diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..5969d5a --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,8 @@ + +# Use this file to define individuals or teams that are responsible for code in a repository. +# Read more: + +# Order is important: the last matching pattern has the highest precedence + +# These owners will be the default owners for everything +* @anyscale/solutionarchitects diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 0000000..e1734b8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,78 @@ +name: Bug Report +description: Issue/Bug with the Terraform Modules. Please include repro steps! +title: "[bug]: " +labels: ["bug", "P1", "terraform-modules"] +assignees: ["brent-anyscale"] + +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to file a bug report. + Please make sure to search for existing issues before filing a new one. + - type: textarea + attributes: + label: Current Behavior + description: A clear description of what the bug is and how it manifests. + validations: + required: true + - type: textarea + attributes: + label: Expected Behavior + description: A clear description of what you expected to happen. + validations: + required: true + - type: textarea + attributes: + label: Steps to Reproduce + description: Please explain the steps required to duplicate this issue. + validations: + required: true + - type: checkboxes + attributes: + label: Which module(s)? + description: "Please select which module(s) this issue impacts." + options: + - label: Root Anyscale Module + - label: anyscale-k8s-configmap + - label: anyscale-k8s-helm + - label: anyscale-k8s-namespace + - type: dropdown + attributes: + label: Anyscale Terraform Module version + description: What version of the modules are you using? + multiple: false + options: + - v0.1.x + validations: + required: true + - type: dropdown + attributes: + label: Terraform version + description: What version of Terraform are you using? + multiple: false + options: + - 1.6.x (Default) + - 1.5.x + - 1.4.x + - 1.3.x + - 1.2.x + - 1.1.x + - 1.0.x + - Other + validations: + required: true + - type: dropdown + attributes: + label: Is it blocking work? + description: Please choose one? + multiple: false + options: + - "no" + - "yes" + validations: + required: true + - type: textarea + attributes: + label: Additional Information + description: List any other information that is relevant to your issue. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..3ba13e0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: false diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml new file mode 100644 index 0000000..48f589d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -0,0 +1,54 @@ +name: Feature Request +description: Feature request for the Terraform Modules. +title: "[Feature]: " +labels: ["feature", "P1", "terraform-modules"] +assignees: ["brent-anyscale"] + +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to request a new feature. + Please make sure to search for existing issues before filing a new one. + - type: textarea + attributes: + label: What are you looking for? + description: Please provide a detailed explanation of the new feature request you are looking for. + placeholder: "Feature request!!" + validations: + required: true + - type: checkboxes + attributes: + label: Which module(s)? + description: "Please select which module(s) this issue impacts." + options: + - label: Root Anyscale Module + - label: anyscale-k8s-configmap + - label: anyscale-k8s-helm + - label: anyscale-k8s-namespace + - type: dropdown + attributes: + label: Is it blocking work? + description: Please choose one? + multiple: false + options: + - "no" + - "yes" + validations: + required: true + - type: dropdown + attributes: + label: Terraform version + description: What version of Terraform are you using? + multiple: false + options: + - 1.6.x (Default) + - 1.5.x + - 1.4.x + - 1.3.x + - 1.2.x + - 1.1.x + - 1.0.x + - Other + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/security-issue.yml b/.github/ISSUE_TEMPLATE/security-issue.yml new file mode 100644 index 0000000..a18a084 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/security-issue.yml @@ -0,0 +1,28 @@ +name: Security Issue +description: General security issue +title: '[Security]: ' +labels: ["security", "P0", "terraform-modules"] +assignees: ["brent-anyscale"] + +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to report a security issue. + If this is a critical security issue that needs to be addressed immediately, email security-911@anyscale.com + - type: textarea + attributes: + label: What is the security issue? + description: Please provide a detailed explanation of the security issue. + value: "<>" + validations: + required: true + - type: checkboxes + attributes: + label: Which module(s)? + description: "Please select which module(s) this issue impacts." + options: + - label: Root Anyscale Module + - label: anyscale-k8s-configmap + - label: anyscale-k8s-helm + - label: anyscale-k8s-namespace diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..6dfd542 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,26 @@ +## Pull request checklist + +Please check if your PR fulfills the following requirements: +- [ ] pre-commit has been run +- [ ] Tests for the changes have been added (for bug fixes / features) +- [ ] All tests passing +- [ ] Docs have been reviewed and added / updated if needed (for bug fixes / features) + +## Pull Request Type + +- [ ] Bugfix +- [ ] New feature +- [ ] Refactoring (no functional changes) +- [ ] Documentation change +- [ ] Other (please describe): + +## Does this introduce a breaking change? +- [ ] Yes +- [ ] No + + + + +## Other information + + diff --git a/.github/create-release.yml b/.github/create-release.yml new file mode 100644 index 0000000..2609224 --- /dev/null +++ b/.github/create-release.yml @@ -0,0 +1,56 @@ +name-template: "v$RESOLVED_VERSION" +tag-template: "v$RESOLVED_VERSION" +version-template: "$MAJOR.$MINOR.$PATCH" +version-resolver: + major: + labels: + - "major" + minor: + labels: + - "minor" + - "enhancement" + patch: + labels: + - "auto-update" + - "patch" + - "fix" + - "bugfix" + - "bug" + - "hotfix" + - "documentation" + - "examples" + - "unittests" + - "github" + default: "minor" + +categories: + - title: "🚀 Enhancements" + labels: + - "enhancement" + - "patch" + - title: "🐛 Bug Fixes" + labels: + - "fix" + - "bugfix" + - "bug" + - "hotfix" + - title: "📚 Documentation/Examples" + labels: + - "documentation" + - "examples" + - title: "Github Chores" + labels: + - "github" +# - title: 'Automatic Updates' +# labels: +# - 'auto-update' + +change-template: | +
+ $TITLE @$AUTHOR (#$NUMBER) + + $BODY +
+ +template: | + $CHANGES diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000..d5f78a3 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,20 @@ +# See https://github.com/actions/labeler for details +# "dependencies" label for any changes in dependency files +# Add 'repo' label to any root file changes +documentation: + - examples/**/* + - README.md + +examples: + - examples/**/* + +terraform: + - "*.tf" + - modules/**/* + +unittests: + - test/**/* + - modules/**/test/* + +github: + - .github/**/* diff --git a/.github/workflows/branch-cleanup.yml b/.github/workflows/branch-cleanup.yml new file mode 100644 index 0000000..8b103ba --- /dev/null +++ b/.github/workflows/branch-cleanup.yml @@ -0,0 +1,20 @@ +name: branch-cleanup +on: + pull_request: + branches: + - main +permissions: + contents: read + +jobs: + cleanup-branch: + name: Auto delete branch on merge + runs-on: ubuntu-latest + permissions: + # write permission is required to create a github release + contents: write + steps: + - uses: cloudposse/actions/github/branch-cleanup@0.28.0 + env: + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + NO_BRANCH_DELETED_EXIT_CODE: 0 diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml new file mode 100644 index 0000000..2fc9c5d --- /dev/null +++ b/.github/workflows/create-release.yml @@ -0,0 +1,44 @@ +name: create-release + +on: + push: + # branches to consider in the event; optional, defaults to all + branches: + - main + # pull_request event is required only for autolabeler + pull_request: + # Only following types are handled by the action, but one can default to all as well + types: [closed] + # pull_request_target event is required for autolabeler to support PRs from forks + # pull_request_target: + # types: [opened, reopened, synchronize] + +permissions: + contents: read + +jobs: + create_release: + if: github.event.pull_request.merged == true + permissions: + # write permission is required to create a github release + contents: write + # write permission is required for autolabeler + # otherwise, read permission is required at least + pull-requests: write + runs-on: ubuntu-latest + steps: + # Get PR from merged commit to main + - uses: actions-ecosystem/action-get-merged-pull-request@v1 + id: get-merged-pull-request + with: + github_token: "${{ secrets.GITHUB_TOKEN }}" + + # Drafts your next Release notes as Pull Requests are merged into "master" + - uses: release-drafter/release-drafter@v5 + # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml + with: + publish: ${{ !contains(steps.get-merged-pull-request.outputs.labels, 'no-release') }} + # prerelease: false + config-name: create-release.yml + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 0000000..a143f90 --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,18 @@ +name: "Pull Request Labeler" +on: + - pull_request_target + +permissions: read-all + +jobs: + add-label: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@v4 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + configuration-path: ".github/labeler.yml" + sync-labels: true diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..a51aa79 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,23 @@ +name: CI/CD Pipeline + +on: + push: + branches: [ "main", "develop" ] + pull_request: + branches: [ "main" ] + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +permissions: read-all + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "build" + unit-tests: + runs-on: ubuntu-latest + name: Terraform Unit Tests + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Run unit tests + run: echo "Unit tests (not active)" diff --git a/.gitignore b/.gitignore index 2faf43d..12e6978 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # Local .terraform directories **/.terraform/* +# Terraform lockfile +.terraform.lock.hcl + # .tfstate files *.tfstate *.tfstate.* @@ -10,8 +13,8 @@ crash.log crash.*.log # Exclude all .tfvars files, which are likely to contain sensitive data, such as -# password, private keys, and other secrets. These should not be part of version -# control as they are data points which are potentially sensitive and subject +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject # to change depending on the environment. *.tfvars *.tfvars.json @@ -23,9 +26,6 @@ override.tf.json *_override.tf *_override.tf.json -# Ignore transient lock info files created by terraform apply -.terraform.tfstate.lock.info - # Include override files you do wish to add to version control using negated pattern # !example_override.tf @@ -35,3 +35,7 @@ override.tf.json # Ignore CLI configuration files .terraformrc terraform.rc + +.DS_Store + +example-anyscale-cloud-register.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..8a1ca97 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,47 @@ +# .pre-commit-config.yaml +default_stages: [commit] +repos: + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.92.1 + hooks: + - id: terraform_fmt + - id: terraform_validate + args: + - --tf-init-args=-upgrade + - --hook-config=--retry-once-with-cleanup=true # requires jq - cleans up broken .terraform directories + # - id: terraform_tflint + # args: + # - --args=--config=__GIT_WORKING_DIR__/.tflint.hcl + - id: terraform_trivy + - id: terraform_checkov + args: + - --args=--quiet + - id: terraform_docs + args: + - --args=--config=.terraform-docs.yml + # - id: tfupdate + # name: Autoupdate Terraform Helm Provider + # args: + # - --args=provider helm + # - --args=--version "~> 2.0" + # - id: tfupdate + # name: Autoupdate Terraform Kubernetes + # - --args=provider kubernetes + # - --args=--version "~> 2.0" + # - id: tfupdate + # name: Autoupdate Terraform AWS Provider + # - --args=provider aws + # - --args=--version "~> 5.0" + # - id: tfupdate + # name: Autoupdate Terraform Google Provider + # - --args=provider google + # - --args=--version "~> 5.0" + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: detect-private-key + - id: check-merge-conflict diff --git a/.terraform-docs.yml b/.terraform-docs.yml new file mode 100644 index 0000000..8248c1a --- /dev/null +++ b/.terraform-docs.yml @@ -0,0 +1,4 @@ +formatter: md +sort: + enabled: true + by: required diff --git a/.tflint.hcl b/.tflint.hcl new file mode 100644 index 0000000..3ecf17e --- /dev/null +++ b/.tflint.hcl @@ -0,0 +1,69 @@ +plugin "aws" { + enabled = true + version = "0.30.0" + source = "github.com/terraform-linters/tflint-ruleset-aws" +} + +plugin "google" { + enabled = true + version = "0.30.0" + source = "github.com/terraform-linters/tflint-ruleset-google" +} + +config { + call_module_type = "local" + force = false +} + +rule "terraform_required_providers" { + enabled = true +} + +rule "terraform_required_version" { + enabled = true +} + +rule "terraform_naming_convention" { + enabled = true + format = "snake_case" +} + +rule "terraform_typed_variables" { + enabled = true +} + +rule "terraform_unused_declarations" { + enabled = true +} + +rule "terraform_comment_syntax" { + enabled = true +} + +rule "terraform_deprecated_index" { + enabled = true +} + +rule "terraform_deprecated_interpolation" { + enabled = true +} + +rule "terraform_documented_outputs" { + enabled = true +} + +rule "terraform_documented_variables" { + enabled = true +} + +rule "terraform_module_pinned_source" { + enabled = true +} + +rule "terraform_standard_module_structure" { + enabled = true +} + +rule "terraform_workspace_remote" { + enabled = true +} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5e4350e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,10 @@ +## 0.1.0 (Released) +FEATURES: +- Initial Kubernetes Anyscale Terraform Module release + +BUG FIXES: + +BREAKING CHANGES: + +NOTES: +- Currently only tested with AWS EKS. Examples and testing for GCP GKE still to be completed. diff --git a/README.md b/README.md index 1ee3790..6bb30e9 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,87 @@ -# terraform-kubernetes-anyscale-foundation-modules -Anyscale Kubernetes Foundation Modules +[![Build Status][badge-build]][build-status] +[![Terraform Version][badge-terraform]](https://github.com/hashicorp/terraform/releases) +[![Kubernetes Provider Version][badge-tf-kubernetes]](https://github.com/terraform-providers/terraform-provider-kubernetes/releases) + +# Terraform Modules for Anyscale Kubernetes Foundations +[Terraform] modules to manage Kubernetes infrastructure for Anyscale. This builds the foundational cloud resources needed to run Anyscale on Kubernetes and should be paired with the [Anyscale AWS]() and [Anyscale GCP]() Terraform Modules. + +**THIS IS PROVIDED AS A STARTING POINT** + +**USE AT YOUR OWN RISK** + +## Kubernetes Resources + +To streamline long-term management and to enable customization, we've modularized the resources into the following Terraform sub-modules: +* anyscale-k8s-helm - Required Helm Charts for Anyscale on Kubernetes +* anyscale-k8s-configmap - A sample configmap for Anyscale. This can also be managed via the Anyscale Kubernetes Manager Helm Chart. +* anyscale-k8s-namespace - The Namespace for Anyscale. This can also be managed via the Anyscale Kubernetes Manager Helm Chart. + +### Customization + +These modules are designed with best practices in mind, ensuring a secure, efficient, and scalable Anyscale deployment. Each module is standalone, allowing you the flexibility to disable any you don't need. This is handy if you're looking to incorporate custom solutions for specific resources. + + +### Examples +The examples folder has a couple common use cases that have been tested. These include: +* Anyscale - AWS & EKS + * [Build everything - use a common name for all resources, public networking](./examples/aws/eks-public) + * [Build everything - use a common name for all resources, private networking](./examples/aws/eks-private) ** Not fully tested ** +* Anyscale - GCP & GKE + * [Build everything - use a common name for all resources, public networking](./examples/aws/gke-public/) ** Not Started ** + * [Build everything - use a common name for all resources, private networking](./examples/aws/gke-private/) ** Not Started ** + +Additional examples can be requested via an [issues] ticket. + +### Specific Module Notes + + +## Reporting Issues + +We use GitHub [Issues] to track community reported issues and missing features. + +## Known Issues/Untested + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [helm](#requirement\_helm) | ~> 2.0 | +| [kubernetes](#requirement\_kubernetes) | ~> 2.0 | +| [time](#requirement\_time) | >= 0.12 | + +## Providers + +No providers. + +## Modules + +No modules. + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [cloud\_provider](#input\_cloud\_provider) | (Required) The cloud provider (aws or gcp)

ex:
cloud_provider = "aws"
| `string` | n/a | yes | +| [aws\_controlplane\_role\_arn](#input\_aws\_controlplane\_role\_arn) | (Optional) The ARN of the AWS IAM role that will be used by the EKS cluster to access AWS services.

Required if `cloud_provider` is set to `aws`.

ex:
aws_controlplane_role_arn = "arn:aws:iam::123456789012:role/my-eks-controlplane-role"
| `string` | `null` | no | +| [aws\_dataplane\_role\_arn](#input\_aws\_dataplane\_role\_arn) | (Optional) The ARN of the AWS IAM role that will be used by the EKS cluster to access AWS services.

Required if `cloud_provider` is set to `aws`.

ex:
aws_dataplane_role_arn = "arn:aws:iam::123456789012:role/my-eks-dataplane-role"
| `string` | `null` | no | +| [kubernetes\_cluster\_name](#input\_kubernetes\_cluster\_name) | (Optional) The name of the Kubernetes cluster.

ex:
kubernetes_cluster_name = "my-cluster"
| `string` | `null` | no | + +## Outputs + +No outputs. + + + +[Terraform]: https://www.terraform.io +[Anyscale]: https://www.anyscale.com +[Issues]: https://github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules/issues +[badge-build]: https://github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules/workflows/CI/CD%20Pipeline/badge.svg +[badge-terraform]: https://img.shields.io/badge/terraform-1.x%20-623CE4.svg?logo=terraform +[badge-tf-kubernetes]: https://img.shields.io/badge/KUBERNETES-2.+-F8991D.svg?logo=terraform +[build-status]: https://github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules/actions diff --git a/anyscale-k8s-helm/data.tf b/anyscale-k8s-helm/data.tf deleted file mode 100644 index 8b13789..0000000 --- a/anyscale-k8s-helm/data.tf +++ /dev/null @@ -1 +0,0 @@ - diff --git a/anyscale-k8s-helm/helm-ingress.tf b/anyscale-k8s-helm/helm-ingress.tf deleted file mode 100644 index 590b3d8..0000000 --- a/anyscale-k8s-helm/helm-ingress.tf +++ /dev/null @@ -1,29 +0,0 @@ -resource "helm_release" "ingress" { - count = local.module_enabled ? 1 : 0 - - name = "anyscale-ingress" - repository = "https://kubernetes.github.io/ingress-nginx" - chart = "ingress-nginx" - namespace = var.ingress_namespace - version = "4.11.1" - create_namespace = true - wait = true - set { - name = "controller.service.type" - value = "LoadBalancer" - } - set { - name = "controller.service.annotations.service\\.beta\\.kubernetes\\.io/aws-load-balancer-type" - value = "nlb" - } - - set { - name = "controller.allowSnippetAnnotations" - value = true - } - - set { - name = "controller.autoscaling.enabled" - value = true - } -} diff --git a/anyscale-k8s-helm/helm-nvidia.tf b/anyscale-k8s-helm/helm-nvidia.tf deleted file mode 100644 index be7375d..0000000 --- a/anyscale-k8s-helm/helm-nvidia.tf +++ /dev/null @@ -1,15 +0,0 @@ -resource "helm_release" "nvidia" { - count = local.module_enabled ? 1 : 0 - name = "nvidia-device-plugin" - repository = "https://nvidia.github.io/k8s-device-plugin" - chart = "nvidia-device-plugin" - namespace = "nvidia-device-plugin" - create_namespace = true - version = "0.16.2" - - // https://github.com/NVIDIA/k8s-device-plugin?tab=readme-ov-file#deploying-with-gpu-feature-discovery-for-automatic-node-labels - set { - name = "gfd.enabled" - value = true - } -} diff --git a/anyscale-k8s-helm/main.tf b/anyscale-k8s-helm/main.tf deleted file mode 100644 index 5831d92..0000000 --- a/anyscale-k8s-helm/main.tf +++ /dev/null @@ -1,24 +0,0 @@ -locals { - module_enabled = var.module_enabled -} - -# AWS Data Sources -data "aws_caller_identity" "current" { - count = var.cloud_provider == "aws" ? 1 : 0 -} -data "aws_region" "current" { - count = var.cloud_provider == "aws" ? 1 : 0 -} - -# GCP Data Sources -data "google_client_config" "current" { - count = var.cloud_provider == "gcp" ? 1 : 0 -} - -data "kubernetes_service" "ingress" { - count = local.module_enabled ? 1 : 0 - metadata { - name = "${helm_release.ingress[0].name}-${helm_release.ingress[0].chart}-controller" - namespace = var.ingress_namespace - } -} diff --git a/anyscale-k8s-helm/outputs.tf b/anyscale-k8s-helm/outputs.tf deleted file mode 100644 index b08d7c6..0000000 --- a/anyscale-k8s-helm/outputs.tf +++ /dev/null @@ -1,19 +0,0 @@ -output "lb_hostnames" { - value = try(data.kubernetes_service.ingress[0].status.0.load_balancer.0.ingress.*.hostname, []) -} - -output "lb_ips" { - value = try(data.kubernetes_service.ingress[0].status.0.load_balancer.0.ingress.*.ip, []) -} - -output "helm_ingress_status" { - value = try(helm_release.ingress[0].status, "") -} - -output "helm_nvidia_status" { - value = try(helm_release.nvidia[0].status, "") -} - -output "helm_autoscaler_status" { - value = try(helm_release.anyscale_cluster_autoscaler[0].status, "") -} diff --git a/anyscale-k8s-helm/test/anyscale-aws-test/.terraform.lock.hcl b/anyscale-k8s-helm/test/anyscale-aws-test/.terraform.lock.hcl deleted file mode 100644 index db5c4db..0000000 --- a/anyscale-k8s-helm/test/anyscale-aws-test/.terraform.lock.hcl +++ /dev/null @@ -1,104 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/hashicorp/aws" { - version = "5.62.0" - constraints = "~> 5.0" - hashes = [ - "h1:X3LAZdkVhb/77gTlhPwKYCA9oblBCSu866fZDDOojPY=", - "zh:1f366cbcda72fb123015439a42ab19f96e10ce4edb404273f4e1b7e06da20b73", - "zh:25f098454a34b483279e0382b24b4f42e51c067222c6e797eda5d3ec33b9beb1", - "zh:4b59d48b527e3cefd73f196853bfc265b3e1e57b55c1c8a2d12ff6e3534b4f07", - "zh:7bb88c1ca95e2b3f0f1fe8636925133b9813fc5b137cc467ba6a233ddf4b360e", - "zh:8a93dece40e816c92647e762839d0370e9cad2aa21dc4ca95baee9385f116459", - "zh:8dfe82c55ab8f633c1e2a39c687e9ca8c892d1c2005bf5166ac396ce868ecd05", - "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", - "zh:a754952d69b4860480d5207390e3ab42350c964dbca9a5ac0c6912dd24b4c11d", - "zh:b2a4dbf4abee0e9ec18c5d323b99defdcd3c681f8c4306fb6e02cff7de038f85", - "zh:b57d84be258b571c04271015f03858ab215768b82e47c11ecd86e789d577030a", - "zh:be811b03289407c8d59e6b199bf16e6071165565ffe502148172d0886cf849c4", - "zh:d4144c7366c840eff1ac15ba13d96063f798f0983d24053a832362033624fe6f", - "zh:d88612856d453c4e10c49c76e4ef522b7d068b4f7c3e2e0b03dd74540986eecd", - "zh:e8bd231a5d0786cc4aab8471bb6dabd5a5df1c598afda077a9f27987ada57b67", - "zh:ffb40a66b4d000a8ee4c54227eeb998f887ad867419c3af7d3981587788de074", - ] -} - -provider "registry.terraform.io/hashicorp/google" { - version = "5.40.0" - hashes = [ - "h1:3GmqYKo7bbbedWdAUTgEogezyx5SNfMfpbXOKffSxSg=", - "zh:441627704233ee27f67d8fa73466a4e0d4097f0f304ad994088bb7947c0d035d", - "zh:6bc65aaf6730a9d1ca597e47054fc07f9cad55d31a66a327818a3b4ffe656700", - "zh:882fb575f5e3f1e2968764569b373932bba789983075bd57cb464f4d2ac52b80", - "zh:a8b1bc92e0f2be329ac3b57cefd735850b1786f1b861b5db1a00d52bc57df6c3", - "zh:b8d446bdf44ca8ce0589b8c7fa04e3f2295ccf61fc05181da9693e142051e355", - "zh:c0a6ce0a02b03bf41259211030046e7f04706f25116e746279bbbbf5e8f10cdb", - "zh:c653ce4ec1e99b16bfc59e5b44b50a1b7b004bbbba98c8ddeb8e8db9860f77d5", - "zh:d2c841cad79edd48f08f4871327dc3664a0b4ce51a92fcaa85c13884523b1475", - "zh:d644d066989e40b9512d4c2d6d2a1a24075b0b3086dbe781127b8f3fb571be73", - "zh:d95b6ee54e9ee1f8bd22a47814f65222edbcecf5393a6aff598ef081f0f9b172", - "zh:dfc8bd95f1aac132fe136d7e13a963551b3853ce40e34425c290b6f17c5e14aa", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} - -provider "registry.terraform.io/hashicorp/helm" { - version = "2.14.1" - constraints = "~> 2.14" - hashes = [ - "h1:JpDnPg+/pqynL54+Og9/bORTRYP1NxFjbrm65827Cns=", - "zh:0b8190016b101edbec158f869e14e5bcb9708dc88040e3d0119f6bf0a0384fa6", - "zh:0bd483d0193716ee7f30ce2e25eebb463aa51700c716842e25026bf2167e8feb", - "zh:5c8c16640f84f952e7ed1bab43b91c65f97168dd3bc189ea368e07fd40d44037", - "zh:67729452ff9c4f7a32d2e0008ce5deb86293929704ed3219971595db757924fa", - "zh:72dd1bc749de240e3700623ab1ff9b490ad5bbf17338e02d30b13a04a3b3c4ef", - "zh:7dcaec73d82c61f4bf315a5074217c6a8c1f774955a7b6f80c943a8907067a6f", - "zh:a48e27fbd17112e4f29d67d0467a8ea1ca554f98bf1f0748f1ebbc61355c465e", - "zh:b6283654f06d6ac5e0d67b0807c348fe5a700febf18f4990bf965705b379e29e", - "zh:dee35c1a536364431b9a6e022a9f89e2942425ca7111edd1ea89d596d68ee4e7", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - "zh:f5dd0141145104c681620d470093bd16bf3e4833021907581317c0b4ed650f8d", - "zh:f7fe46792e37d918e14740fb562b92a6d1594d60a43cc6b944a23a32930a2b16", - ] -} - -provider "registry.terraform.io/hashicorp/kubernetes" { - version = "2.31.0" - constraints = "2.31.0" - hashes = [ - "h1:ZlKkkHJrjF4AiMueI2yA+abBc1c37cfwjyxURdLKhEw=", - "zh:0d16b861edb2c021b3e9d759b8911ce4cf6d531320e5dc9457e2ea64d8c54ecd", - "zh:1bad69ed535a5f32dec70561eb481c432273b81045d788eb8b37f2e4a322cc40", - "zh:43c58e3912fcd5bb346b5cb89f31061508a9be3ca7dd4cd8169c066203bcdfb3", - "zh:4778123da9206918a92dfa73cc711475d2b9a8275ff25c13a30513c523ac9660", - "zh:8bfa67d2db03b3bfae62beebe6fb961aee8d91b7a766efdfe4d337b33dfd23dd", - "zh:9020bb5729db59a520ade5e24984b737e65f8b81751fbbd343926f6d44d22176", - "zh:90431dbfc5b92498bfbce38f0b989978c84421a6c33245b97788a46b563fbd6e", - "zh:b71a061dda1244f6a52500e703a9524b851e7b11bbf238c17bbd282f27d51cb2", - "zh:d6232a7651b834b89591b94bf4446050119dcde740247e6083a4d55a2cefd28a", - "zh:d89fba43e699e28e2b5e92fff2f75fc03dbc8de0df9dacefe1a8836f8f430753", - "zh:ef85c0b744f5ba1b10dadc3c11e331ba4225c45bb733e024d7218c24b02b0512", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} - -provider "registry.terraform.io/hashicorp/time" { - version = "0.12.0" - constraints = ">= 0.9.0" - hashes = [ - "h1:Os2Ok7txtlUJHh6Hg7o+74Ql85SnRb/fGmah22yXpLw=", - "zh:019a4c09af254ef80b72cf0d843dfe72d99483e227138cf5b514a1b9977ab4c3", - "zh:0ae310ec740ebc6f275529507d60bb747d0bf39e72fc5a2fa90d74486006132c", - "zh:13d6aec117f05237fbf8c7d91d6ebb19797b00aa87e7a812642d3ea4738a394e", - "zh:2e87abbc261f9317d0c2ef26e01d5fabf77679da7d2cac6f47df7d198f720989", - "zh:4a6d471176ce0264455aa7d5457b8702f78400010c201c1719708958a1b7b647", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:8978d5474a6da30bc0ad21c17db188d6918cacf3df3f6506b72ef3a268d53e2e", - "zh:b109efe138dfcb45dc04a9cc6809d185ab8b0ebc12040847c2dac430fda5af68", - "zh:b58e039b9106ac0a8de3c07f53b5279d7f0215fb35f2d23df642dfce0875382f", - "zh:ba2cbb2e515922d13efe3a46647be84f5426fcfcaa0f1520b3efeab8db847ed3", - "zh:c6c1ef1f26f25bca3abb5e07fa33dca37ed39cc26d0ff877964f2ffe5edd618c", - "zh:f8e171f923b7d2e789abd034072465dec3e6133c3a7644b7a7a965a74d52224e", - ] -} diff --git a/anyscale-k8s-helm/test/eksctltest.yaml b/anyscale-k8s-helm/test/eksctltest.yaml deleted file mode 100644 index 4deba74..0000000 --- a/anyscale-k8s-helm/test/eksctltest.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: eksctl.io/v1alpha5 -kind: ClusterConfig - -metadata: - name: web-quickstart - region: us-east-2 - -managedNodeGroups: - - name: eks-mng - instanceType: t3.medium - desiredCapacity: 2 - -iam: - withOIDC: true - serviceAccounts: - - metadata: - name: aws-load-balancer-controller - namespace: kube-system - wellKnownPolicies: - awsLoadBalancerController: true - -addons: - - name: aws-ebs-csi-driver - wellKnownPolicies: # Adds an IAM service account - ebsCSIController: true - -cloudWatch: - clusterLogging: - enableTypes: ["*"] - logRetentionInDays: 30 \ No newline at end of file diff --git a/anyscale-k8s-helm/variables.tf b/anyscale-k8s-helm/variables.tf deleted file mode 100644 index 30d6964..0000000 --- a/anyscale-k8s-helm/variables.tf +++ /dev/null @@ -1,130 +0,0 @@ -# ------------------------------------------------------------------------------ -# REQUIRED PARAMETERS -# These variables must be set when using this module. -# ------------------------------------------------------------------------------ -variable "cloud_provider" { - description = <<-EOT - (Required) The cloud provider (aws or gcp) - - ex: - ``` - cloud_provider = "aws" - ``` - EOT - type = string - validation { - condition = ( - var.cloud_provider == "aws" || var.cloud_provider == "gcp" - ) - error_message = "The cloud_provider only allows `aws` or `gcp`" - } -} - -# ------------------------------------------------------------------------------ -# OPTIONAL PARAMETERS -# These variables have defaults, but may be overridden. -# ------------------------------------------------------------------------------ -variable "module_enabled" { - description = <<-EOT - (Optional) Determines if this module should create resources. - - If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided. - ex: - ``` - module_enabled = true - ``` - EOT - type = bool - default = false -} - -variable "ingress_namespace" { - type = string - description = <<-EOT - (Optional) Namespace to place the ingress-nginx chart into. - - ex: - ``` - ingress_namespace = "ingress-nginx" - ``` - EOT - default = "ingress-nginx" -} - -variable "kubernetes_cluster_name" { - description = <<-EOT - (Optional) The name of the Kubernetes cluster. - - ex: - ``` - kubernetes_cluster_name = "anyscale-cluster" - ``` - EOT - type = string - default = null -} - -variable "kubernetes_endpoint_address" { - description = <<-EOT - (Optional) The address of the Kubernetes API server. - - ex: - ``` - kubernetes_endpoint_address = "https://anyscale-cluster.eks.us-east-2.amazonaws.com" - ``` - EOT - type = string - default = null -} - -variable "kubernetes_cluster_ca_data" { - description = <<-EOT - (Optional) The base64 encoded certificate data required to communicate with the Kubernetes cluster. - - ex: - ``` - kubernetes_cluster_ca_data = "LS0txxxxx" - ``` - EOT - type = string - default = null -} - -# ------------------------------------------------------------------------------ -# Helm Chart Variables -# ------------------------------------------------------------------------------ -variable "anyscale_cluster_autoscaler_chart" { - description = <<-EOT - (Optional) The Helm chart to install the Cluster Autoscaler. - - ex: - ``` - anyscale_cluster_autoscaler_chart = { - name = "cluster-autoscaler" - respository = "https://kubernetes.github.io/autoscaler" - chart = "cluster-autoscaler" - chart_version = "9.37.0" - namespace = "kube-system" - values = { - "some.other.config" = "value" - } - } - ``` - EOT - type = object({ - name = string - repository = string - chart = string - chart_version = string - namespace = string - values = map(string) - }) - default = { - name = "anyscale-cluster-autoscaler" - repository = "https://kubernetes.github.io/autoscaler" - chart = "cluster-autoscaler" - chart_version = "9.37.0" - namespace = "kube-system" - values = {} - } -} diff --git a/examples/aws/eks-private/README.md b/examples/aws/eks-private/README.md new file mode 100644 index 0000000..08f7900 --- /dev/null +++ b/examples/aws/eks-private/README.md @@ -0,0 +1,69 @@ +[![Build Status][badge-build]][build-status] +[![Terraform Version][badge-terraform]](https://github.com/hashicorp/terraform/releases) +[![AWS Provider Version][badge-tf-aws]](https://github.com/terraform-providers/terraform-provider-aws/releases) + +# Anyscale AWS EKS Example - Private Networking +This example creates the resources to run Anyscale on AWS EKS with fully private networking. + +**NOTE** +Not fully tested! Known to need some additional work. + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [aws](#requirement\_aws) | ~> 5.0 | +| [helm](#requirement\_helm) | ~> 2.0 | +| [kubernetes](#requirement\_kubernetes) | ~> 2.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [anyscale\_efs](#module\_anyscale\_efs) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-efs | n/a | +| [anyscale\_eks\_cluster](#module\_anyscale\_eks\_cluster) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-eks-cluster | n/a | +| [anyscale\_eks\_nodegroups](#module\_anyscale\_eks\_nodegroups) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-eks-nodegroups | n/a | +| [anyscale\_iam\_roles](#module\_anyscale\_iam\_roles) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-iam | n/a | +| [anyscale\_k8s\_configmap](#module\_anyscale\_k8s\_configmap) | ../../../modules/anyscale-k8s-configmap | n/a | +| [anyscale\_k8s\_helm](#module\_anyscale\_k8s\_helm) | ../../../modules/anyscale-k8s-helm | n/a | +| [anyscale\_k8s\_namespace](#module\_anyscale\_k8s\_namespace) | ../../../modules/anyscale-k8s-namespace | n/a | +| [anyscale\_s3](#module\_anyscale\_s3) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-s3 | n/a | +| [anyscale\_securitygroup](#module\_anyscale\_securitygroup) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-securitygroups | n/a | +| [anyscale\_vpc](#module\_anyscale\_vpc) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-vpc | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [anyscale\_cloud\_id](#input\_anyscale\_cloud\_id) | (Optional) Anyscale Cloud ID. Default is `null`. | `string` | `null` | no | +| [anyscale\_deploy\_env](#input\_anyscale\_deploy\_env) | (Optional) Anyscale deploy environment. Used in resource names and tags. | `string` | `"production"` | no | +| [anyscale\_s3\_cors\_rule](#input\_anyscale\_s3\_cors\_rule) | (Optional) A map of CORS rules for the S3 bucket.

Including here to override for Anyscale Staging. | `map(any)` |
{
"allowed_headers": [
"*"
],
"allowed_methods": [
"GET",
"POST",
"PUT",
"HEAD",
"DELETE"
],
"allowed_origins": [
"https://*.anyscale.com"
],
"expose_headers": []
}
| no | +| [anyscale\_trusted\_role\_arns](#input\_anyscale\_trusted\_role\_arns) | (Optional) A list of ARNs of IAM roles that are trusted by the Anyscale IAM role.

Including here to override for Anyscale Staging. | `list(string)` | `[]` | no | +| [aws\_region](#input\_aws\_region) | The AWS region in which all resources will be created. | `string` | `"us-east-2"` | no | +| [tags](#input\_tags) | (Optional) A map of tags to all resources that accept tags. | `map(string)` |
{
"environment": "example",
"example": "aws/eks-private",
"repo": "terraform-kubernetes-anyscale-foundation-modules",
"test": true
}
| no | + +## Outputs + +| Name | Description | +|------|-------------| +| [anyscale\_register\_command](#output\_anyscale\_register\_command) | Anyscale register command.
This output can be used with the Anyscale CLI to register a new Anyscale Cloud.
You will need to replace `` with a name of your choosing before running the Anyscale CLI command. | +| [eks\_cluster\_name](#output\_eks\_cluster\_name) | The name of the EKS cluster. | + + + +[Terraform]: https://www.terraform.io +[Issues]: https://github.com/anyscale/sa-sandbox-terraform/issues +[badge-build]: https://github.com/anyscale/sa-sandbox-terraform/workflows/CI/CD%20Pipeline/badge.svg +[badge-terraform]: https://img.shields.io/badge/terraform-1.x%20-623CE4.svg?logo=terraform +[badge-tf-aws]: https://img.shields.io/badge/AWS-5.+-F8991D.svg?logo=terraform +[build-status]: https://github.com/anyscale/sa-sandbox-terraform/actions diff --git a/examples/aws/eks-private/main.tf b/examples/aws/eks-private/main.tf new file mode 100644 index 0000000..d2ff64d --- /dev/null +++ b/examples/aws/eks-private/main.tf @@ -0,0 +1,456 @@ +# --------------------------------------------------------------------------------------------------------------------- +# Example Anyscale K8s Resources +# This template creates EKS resources for Anyscale +# It creates: +# - VPC +# - Security Group +# - S3 Bucket +# - IAM Roles +# - EKS Cluster +# - EKS Nodegroups +# - Helm Charts +# --------------------------------------------------------------------------------------------------------------------- +locals { + # azs = slice(data.aws_availability_zones.available.names, 0, 3) + + full_tags = merge(tomap({ + anyscale-cloud-id = var.anyscale_cloud_id, + anyscale-deploy-environment = var.anyscale_deploy_env + }), + var.tags + ) +} + +locals { + public_subnets = ["172.24.101.0/24", "172.24.102.0/24", "172.24.103.0/24"] + private_subnets = ["172.24.20.0/24", "172.24.21.0/24", "172.24.22.0/24"] +} +module "anyscale_vpc" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-vpc" + + anyscale_vpc_name = "anyscale-eks-private" + cidr_block = "172.24.0.0/16" + + public_subnets = local.public_subnets + private_subnets = local.private_subnets +} +locals { + # Because subnet ID may not be known at plan time, we cannot use it as a key + anyscale_subnet_count = length(local.private_subnets) +} + +module "anyscale_securitygroup" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-securitygroups" + + vpc_id = module.anyscale_vpc.vpc_id + + security_group_name_prefix = "anyscale-eks-private-" + + ingress_with_self = [ + { rule = "all-all" } + ] +} + +module "anyscale_s3" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-s3" + + module_enabled = true + + anyscale_bucket_name = "anyscale-eks-private-${var.aws_region}" + force_destroy = true + cors_rule = var.anyscale_s3_cors_rule + + tags = local.full_tags +} + + +module "anyscale_efs" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-efs" + + module_enabled = true + + anyscale_efs_name = "anyscale-eks-private-efs" + mount_targets_subnet_count = local.anyscale_subnet_count + mount_targets_subnets = module.anyscale_vpc.private_subnet_ids + associated_security_group_ids = [module.anyscale_securitygroup.security_group_id] + + tags = local.full_tags +} + +#trivy:ignore:avd-aws-0342 trivy:ignore:avd-aws-0342 +module "anyscale_iam_roles" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-iam" + + module_enabled = true + + create_anyscale_access_role = true + anyscale_trusted_role_arns = var.anyscale_trusted_role_arns + create_cluster_node_instance_profile = false + + create_iam_s3_policy = true + anyscale_s3_bucket_arn = module.anyscale_s3.s3_bucket_arn + + create_anyscale_eks_cluster_role = true + anyscale_eks_cluster_role_name = "anyscale-eks-private-cluster-role" + + create_anyscale_eks_node_role = true + anyscale_eks_node_role_name = "anyscale-eks-private-node-role" + anyscale_eks_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + + create_eks_ebs_csi_driver_role = true + eks_ebs_csi_role_name = "anyscale-eks-private-ebs-csi-role" + anyscale_eks_cluster_oidc_arn = module.anyscale_eks_cluster.eks_cluster_oidc_provider_arn + anyscale_eks_cluster_oidc_url = module.anyscale_eks_cluster.eks_cluster_oidc_provider_url + + create_eks_efs_csi_driver_role = false + eks_efs_csi_role_name = "anyscale-eks-private-efs-csi-role" + efs_file_system_arn = module.anyscale_efs.efs_arn + + tags = local.full_tags +} + +locals { + coredns_config = jsonencode({ + affinity = { + nodeAffinity = { + requiredDuringSchedulingIgnoredDuringExecution = { + nodeSelectorTerms = [ + { + matchExpressions = [ + { + key = "node-type" + operator = "In" + values = ["management"] + } + ] + } + ] + } + } + }, + nodeSelector = { + "node-type" = "management" + }, + tolerations = [ + { + key = "CriticalAddonsOnly" + operator = "Exists" + }, + { + effect = "NoSchedule" + key = "node-role.kubernetes.io/control-plane" + } + ], + replicaCount = 2 + }) + +} + +module "anyscale_eks_cluster" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-eks-cluster" + + module_enabled = true + + anyscale_subnet_ids = module.anyscale_vpc.private_subnet_ids + anyscale_subnet_count = local.anyscale_subnet_count + anyscale_security_group_id = module.anyscale_securitygroup.security_group_id + eks_role_arn = module.anyscale_iam_roles.iam_anyscale_eks_cluster_role_arn + anyscale_eks_name = "anyscale-eks-private" + + enabled_cluster_log_types = ["api", "authenticator", "audit", "scheduler", "controllerManager"] + + eks_addons = [ + { + addon_name = "coredns" + addon_version = "v1.11.1-eksbuild.8" + configuration_values = local.coredns_config + }, + # Add EBS volume support for EKS + { + addon_name = "aws-ebs-csi-driver" + addon_version = "v1.33.0-eksbuild.1" + service_account_role_arn = module.anyscale_iam_roles.iam_anyscale_eks_ebs_csi_driver_role_arn + } + ] + eks_addons_depends_on = module.anyscale_eks_nodegroups + + tags = local.full_tags + + depends_on = [module.anyscale_vpc, module.anyscale_securitygroup] +} + +module "anyscale_eks_nodegroups" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-eks-nodegroups" + + module_enabled = true + + anyscale_security_group_id = module.anyscale_securitygroup.security_group_id + kubernetes_security_group_id = module.anyscale_eks_cluster.cluster_managed_security_group_id + launch_template_name = "anyscale-eks-private-launch-template" + + create_eks_management_node_group = true # Used just to have pods that are available for management helm charts, not for Anyscale resources + + eks_node_role_arn = module.anyscale_iam_roles.iam_anyscale_eks_node_role_arn + eks_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + subnet_ids = module.anyscale_vpc.private_subnet_ids + + tags = local.full_tags + + eks_anyscale_node_groups = [ + { + name = "anyscale-ondemand-cpu-8CPU-32GB" + instance_types = [ + "m6a.2xlarge", + "m5a.2xlarge", + "m6i.2xlarge", + "m5.2xlarge" + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + tags = {} + scaling_config = { + desired_size = 1 # Settng to 1 to prime the autoscaler cache with the instance types and GPU availability + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-ondemand-cpu-16CPU-64GB" + instance_types = [ + "m6a.4xlarge", + "m5a.4xlarge", + "m6i.4xlarge", + "m5.4xlarge", + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + tags = {} + scaling_config = { + desired_size = 1 # Settng to 1 to prime the autoscaler cache with the instance types and GPU availability + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-spot-cpu-16CPU-64GB" + instance_types = [ + "m6a.4xlarge", + "m5a.4xlarge", + "m6i.4xlarge", + "m5.4xlarge", + ] + capacity_type = "SPOT" + ami_type = "AL2_x86_64_GPU" + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "node.anyscale.com/capacity-type", + value = "SPOT", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-ondemand-gpu-16CPU-64GB-1xT4" + instance_types = [ + "g4dn.4xlarge" + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + # Setting the following as labels so the Autoscaler knows where to look for GPU availability + labels = {} + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "nvidia.com/gpu", + value = "present", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/accelerator-type", + value = "GPU", + effect = "NO_SCHEDULE", + } + ] + }, + { + name = "anyscale-ondemand-gpu-16CPU-64GB-1xA10G" + instance_types = [ + "g5.4xlarge" + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + # Setting the following as labels so the Autoscaler knows where to look for GPU availability + labels = { + "nvidia.com/gpu.product" = "NVIDIA-A10G" + "nvidia.com/gpu.count" = "1" + } + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "nvidia.com/gpu", + value = "present", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/accelerator-type", + value = "GPU", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-spot-gpu-16CPU-64GB-1xA10G" + instance_types = [ + "g5.4xlarge" + ] + capacity_type = "SPOT" + ami_type = "AL2_x86_64_GPU" + # Setting the following as labels so the Autoscaler knows where to look for GPU availability + labels = { + "nvidia.com/gpu.product" = "NVIDIA-A10G" + "nvidia.com/gpu.count" = "1" + } + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "nvidia.com/gpu", + value = "present", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/accelerator-type", + value = "GPU", + effect = "NO_SCHEDULE", + } + ] + } + ] +} + +module "anyscale_k8s_helm" { + source = "../../../modules/anyscale-k8s-helm" + + module_enabled = true + cloud_provider = "aws" + + kubernetes_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + + depends_on = [module.anyscale_eks_nodegroups] +} + +module "anyscale_k8s_namespace" { + source = "../../../modules/anyscale-k8s-namespace" + + module_enabled = true + cloud_provider = "aws" + + kubernetes_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + + depends_on = [module.anyscale_eks_cluster] +} + +module "anyscale_k8s_configmap" { + source = "../../../modules/anyscale-k8s-configmap" + + module_enabled = true + cloud_provider = "aws" + + anyscale_kubernetes_namespace = module.anyscale_k8s_namespace.anyscale_kubernetes_namespace_name + + anyscale_instance_types = [ + { + instanceType = "4CPU-16GB", + CPU = 4, + memory = "16Gi" + }, + { + instanceType = "8CPU-32GB" + CPU = 8 + memory = "32Gi" + }, + { + instanceType = "4CPU-16GB-1xA10" + CPU = 4 + GPU = 1 + memory = "16Gi" + accelerator_type = { "A10G" = 1 } + }, + { + instanceType = "4CPU-16GB-1xT4" + CPU = 4 + GPU = 1 + memory = "16Gi" + accelerator_type = { "T4" = 1 } + } + ] + + depends_on = [module.anyscale_eks_cluster, module.anyscale_k8s_helm] +} diff --git a/examples/aws/eks-private/outputs.tf b/examples/aws/eks-private/outputs.tf new file mode 100644 index 0000000..784dfba --- /dev/null +++ b/examples/aws/eks-private/outputs.tf @@ -0,0 +1,28 @@ +locals { + kubernetes_zones = join(",", module.anyscale_vpc.availability_zones) +} + +output "eks_cluster_name" { + description = "The name of the EKS cluster." + value = module.anyscale_eks_cluster.eks_cluster_name +} + +output "anyscale_register_command" { + description = <<-EOF + Anyscale register command. + This output can be used with the Anyscale CLI to register a new Anyscale Cloud. + You will need to replace `` with a name of your choosing before running the Anyscale CLI command. + EOF + value = <<-EOT + anyscale cloud register --provider aws \ + --name \ + --region ${var.aws_region} \ + --compute-stack k8s \ + --anyscale-iam-role-id ${module.anyscale_iam_roles.iam_anyscale_access_role_arn} \ + --s3-bucket-id ${module.anyscale_s3.s3_bucket_id} \ + --kubernetes-namespaces ${module.anyscale_k8s_namespace.anyscale_kubernetes_namespace_name} \ + --kubernetes-ingress-external-address ${module.anyscale_k8s_helm.nginx_ingress_lb_hostname[0]} \ + --kubernetes-zones ${local.kubernetes_zones} \ + --kubernetes-dataplane-identity ${module.anyscale_iam_roles.iam_anyscale_eks_node_role_arn} + EOT +} diff --git a/anyscale-k8s-helm/test/anyscale-aws-test/variables.tf b/examples/aws/eks-private/variables.tf similarity index 73% rename from anyscale-k8s-helm/test/anyscale-aws-test/variables.tf rename to examples/aws/eks-private/variables.tf index 67d3715..feab7f9 100644 --- a/anyscale-k8s-helm/test/anyscale-aws-test/variables.tf +++ b/examples/aws/eks-private/variables.tf @@ -53,45 +53,33 @@ variable "tags" { type = map(string) default = { "test" : true, - "environment" : "test" + "environment" : "example", + "repo" : "terraform-kubernetes-anyscale-foundation-modules", + "example" : "aws/eks-private" } } -variable "kubernetes_cluster_name" { +variable "anyscale_trusted_role_arns" { description = <<-EOT - (Optional) The name of the Kubernetes cluster. + (Optional) A list of ARNs of IAM roles that are trusted by the Anyscale IAM role. - ex: - ``` - kubernetes_cluster_name = "anyscale-cluster" - ``` + Including here to override for Anyscale Staging. EOT - type = string - default = null + type = list(string) + default = [] } -variable "kubernetes_endpoint_address" { +variable "anyscale_s3_cors_rule" { description = <<-EOT - (Optional) The address of the Kubernetes API server. + (Optional) A map of CORS rules for the S3 bucket. - ex: - ``` - kubernetes_endpoint_address = "https://anyscale-cluster.eks.us-east-2.amazonaws.com" - ``` + Including here to override for Anyscale Staging. EOT - type = string - default = null -} - -variable "kubernetes_cluster_ca_data" { - description = <<-EOT - (Optional) The base64 encoded certificate data required to communicate with the Kubernetes cluster. - - ex: - ``` - kubernetes_cluster_ca_data = "LS0txxxxx" - ``` - EOT - type = string - default = null + type = map(any) + default = { + allowed_headers = ["*"] + allowed_methods = ["GET", "POST", "PUT", "HEAD", "DELETE"] + allowed_origins = ["https://*.anyscale.com"] + expose_headers = [] + } } diff --git a/examples/aws/eks-private/versions.tf b/examples/aws/eks-private/versions.tf new file mode 100644 index 0000000..e937434 --- /dev/null +++ b/examples/aws/eks-private/versions.tf @@ -0,0 +1,49 @@ +terraform { + required_version = ">= 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + + helm = { + source = "hashicorp/helm" + version = "~> 2.0" + } + + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + + +provider "helm" { + kubernetes { + host = module.anyscale_eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.anyscale_eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + # https://registry.terraform.io/providers/hashicorp/helm/latest/docs#exec-plugins + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.anyscale_eks_cluster.eks_cluster_name] + command = "aws" + } + } +} + +provider "kubernetes" { + host = module.anyscale_eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.anyscale_eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.anyscale_eks_cluster.eks_cluster_name] + command = "aws" + } +} + +provider "aws" { + region = var.aws_region +} diff --git a/examples/aws/eks-public/README.md b/examples/aws/eks-public/README.md new file mode 100644 index 0000000..79f5dfb --- /dev/null +++ b/examples/aws/eks-public/README.md @@ -0,0 +1,70 @@ +[![Build Status][badge-build]][build-status] +[![Terraform Version][badge-terraform]](https://github.com/hashicorp/terraform/releases) +[![AWS Provider Version][badge-tf-aws]](https://github.com/terraform-providers/terraform-provider-aws/releases) + +# Anyscale AWS EKS Example - Public Networking +This example creates the resources to run Anyscale on AWS EKS with a public networking. + +## Known issues + +- Running `terraform destroy` can time out while deleting the ingress helm chart + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [aws](#requirement\_aws) | ~> 5.0 | +| [helm](#requirement\_helm) | ~> 2.0 | +| [kubernetes](#requirement\_kubernetes) | ~> 2.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [anyscale\_efs](#module\_anyscale\_efs) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-efs | n/a | +| [anyscale\_eks\_cluster](#module\_anyscale\_eks\_cluster) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-eks-cluster | n/a | +| [anyscale\_eks\_nodegroups](#module\_anyscale\_eks\_nodegroups) | ../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-nodegroups | n/a | +| [anyscale\_iam\_roles](#module\_anyscale\_iam\_roles) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-iam | n/a | +| [anyscale\_k8s\_configmap](#module\_anyscale\_k8s\_configmap) | ../../../modules/anyscale-k8s-configmap | n/a | +| [anyscale\_k8s\_helm](#module\_anyscale\_k8s\_helm) | ../../../modules/anyscale-k8s-helm | n/a | +| [anyscale\_k8s\_namespace](#module\_anyscale\_k8s\_namespace) | ../../../modules/anyscale-k8s-namespace | n/a | +| [anyscale\_s3](#module\_anyscale\_s3) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-s3 | n/a | +| [anyscale\_securitygroup](#module\_anyscale\_securitygroup) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-securitygroups | n/a | +| [anyscale\_vpc](#module\_anyscale\_vpc) | github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-vpc | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [anyscale\_cloud\_id](#input\_anyscale\_cloud\_id) | (Optional) Anyscale Cloud ID. Default is `null`. | `string` | `null` | no | +| [anyscale\_deploy\_env](#input\_anyscale\_deploy\_env) | (Optional) Anyscale deploy environment. Used in resource names and tags. | `string` | `"production"` | no | +| [anyscale\_s3\_cors\_rule](#input\_anyscale\_s3\_cors\_rule) | (Optional) A map of CORS rules for the S3 bucket.

Including here to override for Anyscale Staging. | `map(any)` |
{
"allowed_headers": [
"*"
],
"allowed_methods": [
"GET",
"POST",
"PUT",
"HEAD",
"DELETE"
],
"allowed_origins": [
"https://*.anyscale.com"
],
"expose_headers": []
}
| no | +| [anyscale\_trusted\_role\_arns](#input\_anyscale\_trusted\_role\_arns) | (Optional) A list of ARNs of IAM roles that are trusted by the Anyscale IAM role.

Including here to override for Anyscale Staging. | `list(string)` | `[]` | no | +| [aws\_region](#input\_aws\_region) | The AWS region in which all resources will be created. | `string` | `"us-east-2"` | no | +| [tags](#input\_tags) | (Optional) A map of tags to all resources that accept tags. | `map(string)` |
{
"environment": "example",
"example": "aws/eks-public",
"repo": "terraform-kubernetes-anyscale-foundation-modules",
"test": true
}
| no | + +## Outputs + +| Name | Description | +|------|-------------| +| [anyscale\_register\_command](#output\_anyscale\_register\_command) | Anyscale register command.
This output can be used with the Anyscale CLI to register a new Anyscale Cloud.
You will need to replace `` with a name of your choosing before running the Anyscale CLI command. | +| [eks\_cluster\_name](#output\_eks\_cluster\_name) | The name of the EKS cluster. | + + + +[Terraform]: https://www.terraform.io +[Issues]: https://github.com/anyscale/sa-sandbox-terraform/issues +[badge-build]: https://github.com/anyscale/sa-sandbox-terraform/workflows/CI/CD%20Pipeline/badge.svg +[badge-terraform]: https://img.shields.io/badge/terraform-1.x%20-623CE4.svg?logo=terraform +[badge-tf-aws]: https://img.shields.io/badge/AWS-5.+-F8991D.svg?logo=terraform +[build-status]: https://github.com/anyscale/sa-sandbox-terraform/actions diff --git a/examples/aws/eks-public/main.tf b/examples/aws/eks-public/main.tf new file mode 100644 index 0000000..5e0142f --- /dev/null +++ b/examples/aws/eks-public/main.tf @@ -0,0 +1,456 @@ +# --------------------------------------------------------------------------------------------------------------------- +# Example Anyscale K8s Resources - Public Networking +# This template creates EKS resources for Anyscale +# It creates: +# - VPC +# - Security Group +# - S3 Bucket +# - IAM Roles +# - EKS Cluster +# - EKS Nodegroups +# - Helm Charts +# --------------------------------------------------------------------------------------------------------------------- +locals { + # azs = slice(data.aws_availability_zones.available.names, 0, 3) + + full_tags = merge(tomap({ + anyscale-cloud-id = var.anyscale_cloud_id, + anyscale-deploy-environment = var.anyscale_deploy_env + }), + var.tags + ) +} + +locals { + public_subnets = ["172.24.101.0/24", "172.24.102.0/24", "172.24.103.0/24"] + private_subnets = ["172.24.20.0/24", "172.24.21.0/24", "172.24.22.0/24"] +} +module "anyscale_vpc" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-vpc" + + anyscale_vpc_name = "anyscale-eks-public" + cidr_block = "172.24.0.0/16" + + public_subnets = local.public_subnets + private_subnets = local.private_subnets +} +locals { + # Because subnet ID may not be known at plan time, we cannot use it as a key + anyscale_subnet_count = length(local.private_subnets) +} + +module "anyscale_securitygroup" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-securitygroups" + + vpc_id = module.anyscale_vpc.vpc_id + + security_group_name_prefix = "anyscale-eks-public-" + + ingress_with_self = [ + { rule = "all-all" } + ] +} + +module "anyscale_s3" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-s3" + + module_enabled = true + + anyscale_bucket_name = "anyscale-eks-public-${var.aws_region}" + force_destroy = true + cors_rule = var.anyscale_s3_cors_rule + + tags = local.full_tags +} + + +module "anyscale_efs" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-efs" + + module_enabled = true + + anyscale_efs_name = "anyscale-eks-public-efs" + mount_targets_subnet_count = local.anyscale_subnet_count + mount_targets_subnets = module.anyscale_vpc.private_subnet_ids + associated_security_group_ids = [module.anyscale_securitygroup.security_group_id] + + tags = local.full_tags +} + +#trivy:ignore:avd-aws-0342 trivy:ignore:avd-aws-0342 +module "anyscale_iam_roles" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-iam" + + module_enabled = true + + create_anyscale_access_role = true + anyscale_trusted_role_arns = var.anyscale_trusted_role_arns + create_cluster_node_instance_profile = false + + create_iam_s3_policy = true + anyscale_s3_bucket_arn = module.anyscale_s3.s3_bucket_arn + + create_anyscale_eks_cluster_role = true + anyscale_eks_cluster_role_name = "anyscale-eks-public-cluster-role" + + create_anyscale_eks_node_role = true + anyscale_eks_node_role_name = "anyscale-eks-public-node-role" + anyscale_eks_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + + create_eks_ebs_csi_driver_role = true + eks_ebs_csi_role_name = "anyscale-eks-public-ebs-csi-role" + anyscale_eks_cluster_oidc_arn = module.anyscale_eks_cluster.eks_cluster_oidc_provider_arn + anyscale_eks_cluster_oidc_url = module.anyscale_eks_cluster.eks_cluster_oidc_provider_url + + create_eks_efs_csi_driver_role = false + eks_efs_csi_role_name = "anyscale-eks-public-efs-csi-role" + efs_file_system_arn = module.anyscale_efs.efs_arn + + tags = local.full_tags +} + +locals { + coredns_config = jsonencode({ + affinity = { + nodeAffinity = { + requiredDuringSchedulingIgnoredDuringExecution = { + nodeSelectorTerms = [ + { + matchExpressions = [ + { + key = "node-type" + operator = "In" + values = ["management"] + } + ] + } + ] + } + } + }, + nodeSelector = { + "node-type" = "management" + }, + tolerations = [ + { + key = "CriticalAddonsOnly" + operator = "Exists" + }, + { + effect = "NoSchedule" + key = "node-role.kubernetes.io/control-plane" + } + ], + replicaCount = 2 + }) + +} + +module "anyscale_eks_cluster" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "github.com/anyscale/terraform-aws-anyscale-cloudfoundation-modules//modules/aws-anyscale-eks-cluster" + + module_enabled = true + + anyscale_subnet_ids = module.anyscale_vpc.public_subnet_ids + anyscale_subnet_count = local.anyscale_subnet_count + # anyscale_security_group_id = module.anyscale_securitygroup.security_group_id + eks_role_arn = module.anyscale_iam_roles.iam_anyscale_eks_cluster_role_arn + anyscale_eks_name = "anyscale-eks-public" + + enabled_cluster_log_types = ["api", "authenticator", "audit", "scheduler", "controllerManager"] + + eks_addons = [ + { + addon_name = "coredns" + addon_version = "v1.11.1-eksbuild.8" + configuration_values = local.coredns_config + }, + # Add EBS volume support for EKS + { + addon_name = "aws-ebs-csi-driver" + addon_version = "v1.33.0-eksbuild.1" + service_account_role_arn = module.anyscale_iam_roles.iam_anyscale_eks_ebs_csi_driver_role_arn + } + ] + eks_addons_depends_on = module.anyscale_eks_nodegroups + + tags = local.full_tags + + depends_on = [module.anyscale_vpc, module.anyscale_securitygroup] +} + +module "anyscale_eks_nodegroups" { + #checkov:skip=CKV_TF_1: Example code should use the latest version of the module + #checkov:skip=CKV_TF_2: Example code should use the latest version of the module + source = "../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-nodegroups" + + module_enabled = true + + anyscale_security_group_id = module.anyscale_securitygroup.security_group_id + kubernetes_security_group_id = module.anyscale_eks_cluster.cluster_managed_security_group_id + launch_template_name = "anyscale-eks-public-launch-template" + + create_eks_management_node_group = true # Used just to have pods that are available for management helm charts, not for Anyscale resources + + eks_node_role_arn = module.anyscale_iam_roles.iam_anyscale_eks_node_role_arn + eks_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + subnet_ids = module.anyscale_vpc.private_subnet_ids + + tags = local.full_tags + + eks_anyscale_node_groups = [ + { + name = "anyscale-ondemand-cpu-8CPU-32GB" + instance_types = [ + "m6a.2xlarge", + "m5a.2xlarge", + "m6i.2xlarge", + "m5.2xlarge" + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + tags = {} + scaling_config = { + desired_size = 1 # Settng to 1 to prime the autoscaler cache with the instance types and GPU availability + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-ondemand-cpu-16CPU-64GB" + instance_types = [ + "m6a.4xlarge", + "m5a.4xlarge", + "m6i.4xlarge", + "m5.4xlarge", + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + tags = {} + scaling_config = { + desired_size = 1 # Settng to 1 to prime the autoscaler cache with the instance types and GPU availability + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-spot-cpu-16CPU-64GB" + instance_types = [ + "m6a.4xlarge", + "m5a.4xlarge", + "m6i.4xlarge", + "m5.4xlarge", + ] + capacity_type = "SPOT" + ami_type = "AL2_x86_64_GPU" + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "node.anyscale.com/capacity-type", + value = "SPOT", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-ondemand-gpu-16CPU-64GB-1xT4" + instance_types = [ + "g4dn.4xlarge" + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + # Setting the following as labels so the Autoscaler knows where to look for GPU availability + labels = {} + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "nvidia.com/gpu", + value = "present", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/accelerator-type", + value = "GPU", + effect = "NO_SCHEDULE", + } + ] + }, + { + name = "anyscale-ondemand-gpu-16CPU-64GB-1xA10G" + instance_types = [ + "g5.4xlarge" + ] + capacity_type = "ON_DEMAND" + ami_type = "AL2_x86_64_GPU" + # Setting the following as labels so the Autoscaler knows where to look for GPU availability + labels = { + "nvidia.com/gpu.product" = "NVIDIA-A10G" + "nvidia.com/gpu.count" = "1" + } + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "nvidia.com/gpu", + value = "present", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/accelerator-type", + value = "GPU", + effect = "NO_SCHEDULE", + } + ] + }, + + { + name = "anyscale-spot-gpu-16CPU-64GB-1xA10G" + instance_types = [ + "g5.4xlarge" + ] + capacity_type = "SPOT" + ami_type = "AL2_x86_64_GPU" + # Setting the following as labels so the Autoscaler knows where to look for GPU availability + labels = { + "nvidia.com/gpu.product" = "NVIDIA-A10G" + "nvidia.com/gpu.count" = "1" + } + tags = {} + scaling_config = { + desired_size = 0 + max_size = 50 + min_size = 0 + } + taints = [ + { + key = "nvidia.com/gpu", + value = "present", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/capacity-type", + value = "ANY", + effect = "NO_SCHEDULE", + }, + { + key = "node.anyscale.com/accelerator-type", + value = "GPU", + effect = "NO_SCHEDULE", + } + ] + } + ] +} + +module "anyscale_k8s_helm" { + source = "../../../modules/anyscale-k8s-helm" + + module_enabled = true + cloud_provider = "aws" + + kubernetes_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + + depends_on = [module.anyscale_eks_nodegroups] +} + +module "anyscale_k8s_namespace" { + source = "../../../modules/anyscale-k8s-namespace" + + module_enabled = true + cloud_provider = "aws" + + kubernetes_cluster_name = module.anyscale_eks_cluster.eks_cluster_name + + depends_on = [module.anyscale_eks_cluster] +} + +module "anyscale_k8s_configmap" { + source = "../../../modules/anyscale-k8s-configmap" + + module_enabled = true + cloud_provider = "aws" + + anyscale_kubernetes_namespace = module.anyscale_k8s_namespace.anyscale_kubernetes_namespace_name + + anyscale_instance_types = [ + { + instanceType = "4CPU-16GB", + CPU = 4, + memory = "16Gi" + }, + { + instanceType = "8CPU-32GB" + CPU = 8 + memory = "32Gi" + }, + { + instanceType = "4CPU-16GB-1xA10" + CPU = 4 + GPU = 1 + memory = "16Gi" + accelerator_type = { "A10G" = 1 } + }, + { + instanceType = "4CPU-16GB-1xT4" + CPU = 4 + GPU = 1 + memory = "16Gi" + accelerator_type = { "T4" = 1 } + } + ] + + depends_on = [module.anyscale_eks_cluster, module.anyscale_k8s_helm] +} diff --git a/examples/aws/eks-public/outputs.tf b/examples/aws/eks-public/outputs.tf new file mode 100644 index 0000000..64233e4 --- /dev/null +++ b/examples/aws/eks-public/outputs.tf @@ -0,0 +1,29 @@ +locals { + kubernetes_zones = join(",", module.anyscale_vpc.availability_zones) +} + +output "eks_cluster_name" { + description = "The name of the EKS cluster." + value = module.anyscale_eks_cluster.eks_cluster_name +} + +output "anyscale_register_command" { + description = <<-EOF + Anyscale register command. + This output can be used with the Anyscale CLI to register a new Anyscale Cloud. + You will need to replace `` with a name of your choosing before running the Anyscale CLI command. + EOF + value = <<-EOT + anyscale cloud register --provider aws \ + --name \ + --region ${var.aws_region} \ + --compute-stack k8s \ + --anyscale-iam-role-id ${module.anyscale_iam_roles.iam_anyscale_access_role_arn} \ + --s3-bucket-id ${module.anyscale_s3.s3_bucket_id} \ + --efs-id ${module.anyscale_efs.efs_id} \ + --kubernetes-namespaces ${module.anyscale_k8s_namespace.anyscale_kubernetes_namespace_name} \ + --kubernetes-ingress-external-address ${module.anyscale_k8s_helm.nginx_ingress_lb_hostname[0]} \ + --kubernetes-zones ${local.kubernetes_zones} \ + --kubernetes-dataplane-identity ${module.anyscale_iam_roles.iam_anyscale_eks_node_role_arn} + EOT +} diff --git a/examples/aws/eks-public/variables.tf b/examples/aws/eks-public/variables.tf new file mode 100644 index 0000000..a16a9c0 --- /dev/null +++ b/examples/aws/eks-public/variables.tf @@ -0,0 +1,85 @@ +# --------------------------------------------------------------------------------------------------------------------- +# ENVIRONMENT VARIABLES +# Define these secrets as environment variables +# --------------------------------------------------------------------------------------------------------------------- + +# AWS_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY + +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED VARIABLES +# These variables must be set when using this module. +# --------------------------------------------------------------------------------------------------------------------- + +variable "aws_region" { + description = "The AWS region in which all resources will be created." + type = string + default = "us-east-2" +} + +variable "anyscale_cloud_id" { + description = "(Optional) Anyscale Cloud ID. Default is `null`." + type = string + default = null + validation { + condition = ( + var.anyscale_cloud_id == null ? true : ( + length(var.anyscale_cloud_id) > 4 && + substr(var.anyscale_cloud_id, 0, 4) == "cld_" + ) + ) + error_message = "The anyscale_cloud_id value must start with \"cld_\"." + } +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "anyscale_deploy_env" { + description = "(Optional) Anyscale deploy environment. Used in resource names and tags." + type = string + default = "production" + validation { + condition = ( + var.anyscale_deploy_env == "production" || var.anyscale_deploy_env == "development" || var.anyscale_deploy_env == "test" + ) + error_message = "The anyscale_deploy_env only allows `production`, `test`, or `development`" + } +} + +variable "tags" { + description = "(Optional) A map of tags to all resources that accept tags." + type = map(string) + default = { + "test" : true, + "environment" : "example" + "repo" : "terraform-kubernetes-anyscale-foundation-modules", + "example" : "aws/eks-public" + } +} + +variable "anyscale_trusted_role_arns" { + description = <<-EOT + (Optional) A list of ARNs of IAM roles that are trusted by the Anyscale IAM role. + + Including here to override for Anyscale Staging. + EOT + type = list(string) + default = [] +} + +variable "anyscale_s3_cors_rule" { + description = <<-EOT + (Optional) A map of CORS rules for the S3 bucket. + + Including here to override for Anyscale Staging. + EOT + type = map(any) + default = { + allowed_headers = ["*"] + allowed_methods = ["GET", "POST", "PUT", "HEAD", "DELETE"] + allowed_origins = ["https://*.anyscale.com"] + expose_headers = [] + } +} diff --git a/examples/aws/eks-public/versions.tf b/examples/aws/eks-public/versions.tf new file mode 100644 index 0000000..e937434 --- /dev/null +++ b/examples/aws/eks-public/versions.tf @@ -0,0 +1,49 @@ +terraform { + required_version = ">= 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + + helm = { + source = "hashicorp/helm" + version = "~> 2.0" + } + + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + + +provider "helm" { + kubernetes { + host = module.anyscale_eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.anyscale_eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + # https://registry.terraform.io/providers/hashicorp/helm/latest/docs#exec-plugins + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.anyscale_eks_cluster.eks_cluster_name] + command = "aws" + } + } +} + +provider "kubernetes" { + host = module.anyscale_eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.anyscale_eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.anyscale_eks_cluster.eks_cluster_name] + command = "aws" + } +} + +provider "aws" { + region = var.aws_region +} diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/anyscale-k8s-configmap/README.md b/modules/anyscale-k8s-configmap/README.md new file mode 100644 index 0000000..6fab950 --- /dev/null +++ b/modules/anyscale-k8s-configmap/README.md @@ -0,0 +1,63 @@ +[![Build Status][badge-build]][build-status] +[![Terraform Version][badge-terraform]](https://github.com/hashicorp/terraform/releases) +[![OpenTofu Version][badge-opentofu]](https://github.com/opentofu/opentofu/releases) +[![Kubernetes Provider Version][badge-tf-kubernetes]](https://github.com/terraform-providers/terraform-provider-kubernetes/releases) +[![AWS Provider Version][badge-tf-aws]](https://github.com/terraform-providers/terraform-provider-aws/releases) +[![Google Provider Version][badge-tf-google]](https://github.com/terraform-providers/terraform-provider-google/releases) + +# anyscale-k8s-configmap +This module creates Kubernetes Configmaps for Anyscale applications and workloads. + +The `instance-types` ConfigMap defines the instance types that you wish to run on Anyscale. This ConfigMap can also be created +via the Anyscale Helm Chart. + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [kubernetes](#requirement\_kubernetes) | ~> 2.0 | + +## Providers + +| Name | Version | +|------|---------| +| [kubernetes](#provider\_kubernetes) | 2.32.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [kubernetes_config_map.instance_type](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [anyscale\_kubernetes\_namespace](#input\_anyscale\_kubernetes\_namespace) | (Optional) The namespace to install the Anyscale resources.

ex:
anyscale_kubernetes_namespace = "anyscale-k8s"
| `string` | n/a | yes | +| [cloud\_provider](#input\_cloud\_provider) | (Required) The cloud provider (aws or gcp)

ex:
cloud_provider = "aws"
| `string` | n/a | yes | +| [anyscale\_instance\_types](#input\_anyscale\_instance\_types) | (Optional) A list of instance types to create in the instance-types configmap.

ex:
anyscale_instance_types = [
{
instanceType = "8CPU-32GB"
CPU = 8
memory = 32Gi # 32gb
},
{
instanceType = "4CPU-16GB-1xA10"
CPU = 4
GPU = 1
memory = 17179869184 # 16gb converted to bytes
accelerator_type = {"A10G" = 1}
},
{
instanceType = "8CPU-32GB-1xA10"
CPU = 8
GPU = 1
memory = 32Gi # 32gb
accelerator_type = {"A10G" = 1}
},
{
instanceType = "8CPU-32GB-1xT4"
CPU = 8
GPU = 1
memory = 32Gi # 32gb
accelerator_type = {"T4" = 1}
}
]
|
list(object({
instanceType = string
CPU = number
GPU = optional(number)
memory = string
accelerator_type = optional(map(number)) # accelerator_type should be a map of key-value pairs
}))
|
[
{
"CPU": 8,
"instanceType": "8CPU-32GB",
"memory": "32Gi"
}
]
| no | +| [anyscale\_instance\_types\_version](#input\_anyscale\_instance\_types\_version) | (Optional) The version of the instance-types configmap.

ex:
anyscale_instance_types_version = "v1"
| `string` | `"v1"` | no | +| [create\_anyscale\_instance\_types\_map](#input\_create\_anyscale\_instance\_types\_map) | (Optional) Determines if the instance-types configmap should be created.

ex:
create_anyscale_instance_types_map = true
| `bool` | `true` | no | +| [module\_enabled](#input\_module\_enabled) | (Optional) Determines if this module should create resources.

If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided.
ex:
module_enabled = true
| `bool` | `false` | no | + +## Outputs + +No outputs. + + + +[Terraform]: https://www.terraform.io +[Issues]: https://github.com/anyscale/sa-sandbox-terraform/issues +[badge-build]: https://github.com/anyscale/sa-sandbox-terraform/workflows/CI/CD%20Pipeline/badge.svg +[badge-terraform]: https://img.shields.io/badge/terraform-1.x%20-623CE4.svg?logo=terraform +[badge-tf-aws]: https://img.shields.io/badge/AWS-5.+-F8991D.svg?logo=terraform +[build-status]: https://github.com/anyscale/sa-sandbox-terraform/actions +[badge-opentofu]: https://img.shields.io/badge/opentofu-1.x%20-623CE4.svg?logo=terraform +[badge-tf-google]: https://img.shields.io/badge/Google-5.+-F8991D.svg?logo=terraform +[badge-tf-kubernetes]: https://img.shields.io/badge/KUBERNETES-2.+-F8991D.svg?logo=terraform diff --git a/modules/anyscale-k8s-configmap/main.tf b/modules/anyscale-k8s-configmap/main.tf new file mode 100644 index 0000000..69ffb75 --- /dev/null +++ b/modules/anyscale-k8s-configmap/main.tf @@ -0,0 +1,33 @@ +locals { + module_enabled = var.module_enabled + + aws_enabled = local.module_enabled && var.cloud_provider == "aws" + gcp_enabled = local.module_enabled && var.cloud_provider == "gcp" + + create_anyscale_instance_types = local.module_enabled && var.create_anyscale_instance_types_map + +} + +resource "kubernetes_config_map" "instance_type" { + count = local.module_enabled && var.create_anyscale_instance_types_map ? 1 : 0 + metadata { + name = "instance-types" + namespace = var.anyscale_kubernetes_namespace + } + + data = { + version = var.anyscale_instance_types_version + "instance_types.yaml" = yamlencode({ + for instance in var.anyscale_instance_types : instance.instanceType => { + resources = merge( + { + CPU = instance.CPU + memory = instance.memory + }, + instance.GPU != null ? { GPU = instance.GPU } : {}, + instance.accelerator_type != null ? { for key, value in instance.accelerator_type : "accelerator_type:${key}" => value } : {} + ) + } + }) + } +} diff --git a/modules/anyscale-k8s-configmap/outputs.tf b/modules/anyscale-k8s-configmap/outputs.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/anyscale-k8s-configmap/test/anyscale-aws-test/main.tf b/modules/anyscale-k8s-configmap/test/anyscale-aws-test/main.tf new file mode 100644 index 0000000..ad4a79a --- /dev/null +++ b/modules/anyscale-k8s-configmap/test/anyscale-aws-test/main.tf @@ -0,0 +1,197 @@ +# --------------------------------------------------------------------------------------------------------------------- +# CREATE Anyscale K8s ConfigMap Resources +# This template creates EKS resources for Anyscale +# Requires: +# - VPC +# - Security Group +# - IAM Roles +# - EKS Cluster +# --------------------------------------------------------------------------------------------------------------------- +locals { + # azs = slice(data.aws_availability_zones.available.names, 0, 3) + + full_tags = merge(tomap({ + anyscale-cloud-id = var.anyscale_cloud_id, + anyscale-deploy-environment = var.anyscale_deploy_env + }), + var.tags + ) +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create resources for EKS TF Module +# Creates a VPC +# Creates a Security Group +# Creates IAM Roles +# --------------------------------------------------------------------------------------------------------------------- +locals { + public_subnets = ["172.24.101.0/24", "172.24.102.0/24", "172.24.103.0/24"] + private_subnets = ["172.24.20.0/24", "172.24.21.0/24", "172.24.22.0/24"] +} +module "eks_vpc" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-vpc" + + anyscale_vpc_name = "tftest-k8s-configmap" + cidr_block = "172.24.0.0/16" + + public_subnets = local.public_subnets + private_subnets = local.private_subnets +} +locals { + # Because subnet ID may not be known at plan time, we cannot use it as a key + anyscale_subnet_count = length(local.private_subnets) +} + +module "eks_securitygroup" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-securitygroups" + + vpc_id = module.eks_vpc.vpc_id + + security_group_name_prefix = "tftest-k8s-configmap-" + + ingress_with_self = [ + { rule = "all-all" } + ] +} + +module "eks_iam_roles" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-iam" + + module_enabled = true + create_anyscale_access_role = true + anyscale_access_role_name = "tftest-k8s-configmap-controlplane-role" + create_cluster_node_instance_profile = false + create_iam_s3_policy = false + + create_anyscale_eks_cluster_role = true + anyscale_eks_cluster_role_name = "tftest-k8s-configmap-cluster-role" + create_anyscale_eks_node_role = true + anyscale_eks_node_role_name = "tftest-k8s-configmap-node-role" + + tags = local.full_tags +} + +locals { + coredns_config = jsonencode({ + affinity = { + nodeAffinity = { + requiredDuringSchedulingIgnoredDuringExecution = { + nodeSelectorTerms = [ + { + matchExpressions = [ + { + key = "node-type" + operator = "In" + values = ["management"] + } + ] + } + ] + } + } + }, + nodeSelector = { + "node-type" = "management" + }, + tolerations = [ + { + key = "CriticalAddonsOnly" + operator = "Exists" + }, + { + effect = "NoSchedule" + key = "node-role.kubernetes.io/control-plane" + } + ], + replicaCount = 2 + }) +} + +module "eks_cluster" { + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-cluster" + + module_enabled = true + + anyscale_subnet_ids = module.eks_vpc.public_subnet_ids + anyscale_subnet_count = local.anyscale_subnet_count + anyscale_security_group_id = module.eks_securitygroup.security_group_id + eks_role_arn = module.eks_iam_roles.iam_anyscale_eks_cluster_role_arn + anyscale_eks_name = "tftest-k8s-configmap" + + tags = local.full_tags + + depends_on = [module.eks_iam_roles, module.eks_vpc, module.eks_securitygroup] +} + +module "k8s_default_namespace" { + source = "../../../anyscale-k8s-namespace" + + module_enabled = true + cloud_provider = "aws" +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create Resources with no optional parameters +# --------------------------------------------------------------------------------------------------------------------- +module "all_defaults" { + source = "../../" + + module_enabled = true + cloud_provider = "aws" + + anyscale_kubernetes_namespace = module.k8s_default_namespace.anyscale_kubernetes_namespace_name +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create Resources with as many optional parameters as possible +# --------------------------------------------------------------------------------------------------------------------- +module "k8s_kitchensink_namespace" { + source = "../../../anyscale-k8s-namespace" + + module_enabled = true + cloud_provider = "aws" + + anyscale_kubernetes_namespace = "kitchensink" +} + +module "kitchen_sink" { + source = "../../" + + module_enabled = true + cloud_provider = "aws" + + anyscale_kubernetes_namespace = module.k8s_kitchensink_namespace.anyscale_kubernetes_namespace_name + + create_anyscale_instance_types_map = true + anyscale_instance_types_version = "v1" + anyscale_instance_types = [ + { + instanceType = "t3.small" + CPU = 2 + memory = "4Gi" + }, + { + instanceType = "4CPU-16GB-1xA10" + CPU = 4 + GPU = 1 + memory = "16Gi" # 16gb converted to bytes + accelerator_type = { "A10G" = 1 } + }, + ] +} + +# --------------------------------------------------------------------------------------------------------------------- +# Do not create any resources +# --------------------------------------------------------------------------------------------------------------------- +module "test_no_resources" { + source = "../.." + + module_enabled = false + + #Required variables + cloud_provider = "aws" + anyscale_kubernetes_namespace = module.k8s_default_namespace.anyscale_kubernetes_namespace_name +} diff --git a/modules/anyscale-k8s-configmap/test/anyscale-aws-test/outputs.tf b/modules/anyscale-k8s-configmap/test/anyscale-aws-test/outputs.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/anyscale-k8s-configmap/test/anyscale-aws-test/variables.tf b/modules/anyscale-k8s-configmap/test/anyscale-aws-test/variables.tf new file mode 100644 index 0000000..9393991 --- /dev/null +++ b/modules/anyscale-k8s-configmap/test/anyscale-aws-test/variables.tf @@ -0,0 +1,58 @@ +# --------------------------------------------------------------------------------------------------------------------- +# ENVIRONMENT VARIABLES +# Define these secrets as environment variables +# --------------------------------------------------------------------------------------------------------------------- + +# AWS_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY + +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED VARIABLES +# These variables must be set when using this module. +# --------------------------------------------------------------------------------------------------------------------- + +variable "aws_region" { + description = "The AWS region in which all resources will be created." + type = string + default = "us-east-2" +} + +variable "anyscale_cloud_id" { + description = "(Optional) Anyscale Cloud ID. Default is `null`." + type = string + default = null + validation { + condition = ( + var.anyscale_cloud_id == null ? true : ( + length(var.anyscale_cloud_id) > 4 && + substr(var.anyscale_cloud_id, 0, 4) == "cld_" + ) + ) + error_message = "The anyscale_cloud_id value must start with \"cld_\"." + } +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "anyscale_deploy_env" { + description = "(Optional) Anyscale deploy environment. Used in resource names and tags." + type = string + default = "production" + validation { + condition = ( + var.anyscale_deploy_env == "production" || var.anyscale_deploy_env == "development" || var.anyscale_deploy_env == "test" + ) + error_message = "The anyscale_deploy_env only allows `production`, `test`, or `development`" + } +} + +variable "tags" { + description = "(Optional) A map of tags to all resources that accept tags." + type = map(string) + default = { + "test" : true, + "environment" : "test" + } +} diff --git a/modules/anyscale-k8s-configmap/test/anyscale-aws-test/versions.tf b/modules/anyscale-k8s-configmap/test/anyscale-aws-test/versions.tf new file mode 100644 index 0000000..c46128a --- /dev/null +++ b/modules/anyscale-k8s-configmap/test/anyscale-aws-test/versions.tf @@ -0,0 +1,31 @@ +terraform { + required_version = ">= 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + + + +provider "kubernetes" { + host = module.eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.eks_cluster.eks_cluster_name] + command = "aws" + } +} + +provider "aws" { + region = var.aws_region +} diff --git a/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/main.tf b/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/outputs.tf b/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/outputs.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/variables.tf b/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/variables.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/versions.tf b/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/versions.tf new file mode 100644 index 0000000..fb398ef --- /dev/null +++ b/modules/anyscale-k8s-configmap/test/anyscale-gcp-test/versions.tf @@ -0,0 +1,23 @@ +terraform { + required_version = ">= 1.0" + required_providers { + + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + + + +provider "kubernetes" { + host = module.eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.eks_cluster.eks_cluster_name] + command = "aws" + } +} diff --git a/modules/anyscale-k8s-configmap/variables.tf b/modules/anyscale-k8s-configmap/variables.tf new file mode 100644 index 0000000..2ed565b --- /dev/null +++ b/modules/anyscale-k8s-configmap/variables.tf @@ -0,0 +1,133 @@ +# ------------------------------------------------------------------------------ +# REQUIRED PARAMETERS +# These variables must be set when using this module. +# ------------------------------------------------------------------------------ +variable "cloud_provider" { + description = <<-EOT + (Required) The cloud provider (aws or gcp) + + ex: + ``` + cloud_provider = "aws" + ``` + EOT + type = string + validation { + condition = ( + var.cloud_provider == "aws" || var.cloud_provider == "gcp" + ) + error_message = "The cloud_provider only allows `aws` or `gcp`" + } +} + +variable "anyscale_kubernetes_namespace" { + description = <<-EOT + (Optional) The namespace to install the Anyscale resources. + + ex: + ``` + anyscale_kubernetes_namespace = "anyscale-k8s" + ``` + EOT + type = string +} + + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "module_enabled" { + description = <<-EOT + (Optional) Determines if this module should create resources. + + If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided. + ex: + ``` + module_enabled = true + ``` + EOT + type = bool + default = false +} + +# ------------------ +# Instance Types +# ------------------ +variable "create_anyscale_instance_types_map" { + description = <<-EOT + (Optional) Determines if the instance-types configmap should be created. + + ex: + ``` + create_anyscale_instance_types_map = true + ``` + EOT + type = bool + default = true +} + +variable "anyscale_instance_types_version" { + description = <<-EOT + (Optional) The version of the instance-types configmap. + + ex: + ``` + anyscale_instance_types_version = "v1" + ``` + EOT + type = string + default = "v1" +} + +variable "anyscale_instance_types" { + description = <<-EOT + (Optional) A list of instance types to create in the instance-types configmap. + + ex: + ``` + anyscale_instance_types = [ + { + instanceType = "8CPU-32GB" + CPU = 8 + memory = 32Gi # 32gb + }, + { + instanceType = "4CPU-16GB-1xA10" + CPU = 4 + GPU = 1 + memory = 17179869184 # 16gb converted to bytes + accelerator_type = {"A10G" = 1} + }, + { + instanceType = "8CPU-32GB-1xA10" + CPU = 8 + GPU = 1 + memory = 32Gi # 32gb + accelerator_type = {"A10G" = 1} + }, + { + instanceType = "8CPU-32GB-1xT4" + CPU = 8 + GPU = 1 + memory = 32Gi # 32gb + accelerator_type = {"T4" = 1} + } + ] + ``` + EOT + type = list(object({ + instanceType = string + CPU = number + GPU = optional(number) + memory = string + accelerator_type = optional(map(number)) # accelerator_type should be a map of key-value pairs + })) + default = [ + { + instanceType = "8CPU-32GB" + CPU = 8 + memory = "32Gi" + } + ] +} diff --git a/modules/anyscale-k8s-configmap/versions.tf b/modules/anyscale-k8s-configmap/versions.tf new file mode 100644 index 0000000..94019cd --- /dev/null +++ b/modules/anyscale-k8s-configmap/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} diff --git a/modules/anyscale-k8s-helm/README.md b/modules/anyscale-k8s-helm/README.md new file mode 100644 index 0000000..38f33a3 --- /dev/null +++ b/modules/anyscale-k8s-helm/README.md @@ -0,0 +1,83 @@ +[![Build Status][badge-build]][build-status] +[![Terraform Version][badge-terraform]](https://github.com/hashicorp/terraform/releases) +[![OpenTofu Version][badge-opentofu]](https://github.com/opentofu/opentofu/releases) +[![Kubernetes Provider Version][badge-tf-kubernetes]](https://github.com/terraform-providers/terraform-provider-kubernetes/releases) +[![AWS Provider Version][badge-tf-aws]](https://github.com/terraform-providers/terraform-provider-aws/releases) +[![Google Provider Version][badge-tf-google]](https://github.com/terraform-providers/terraform-provider-google/releases) + +# anyscale-k8s-helm +This module creates Kubernetes helm charts for Anyscale applications and workloads. + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [helm](#requirement\_helm) | ~> 2.0 | +| [kubernetes](#requirement\_kubernetes) | ~> 2.0 | +| [time](#requirement\_time) | >= 0.12 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | 5.63.0 | +| [helm](#provider\_helm) | 2.15.0 | +| [kubernetes](#provider\_kubernetes) | 2.32.0 | +| [time](#provider\_time) | 0.12.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [helm_release.anyscale_cluster_autoscaler](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [helm_release.feature_metrics_server](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [helm_release.nginx_ingress](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [helm_release.nvidia](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [kubernetes_namespace.ingress_nginx](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | +| [time_sleep.wait_helm_termination](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/sleep) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | +| [kubernetes_service.nginx_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/service) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [cloud\_provider](#input\_cloud\_provider) | (Required) The cloud provider (aws or gcp)

ex:
cloud_provider = "aws"
| `string` | n/a | yes | +| [anyscale\_cluster\_autoscaler\_chart](#input\_anyscale\_cluster\_autoscaler\_chart) | (Optional) The Helm chart to install the Cluster Autoscaler.

ex:
anyscale_cluster_autoscaler_chart = {
enabled = true
name = "cluster-autoscaler"
respository = "https://kubernetes.github.io/autoscaler"
chart = "cluster-autoscaler"
chart_version = "9.37.0"
namespace = "kube-system"
values = {
"some.other.config" = "value"
}
}
|
object({
enabled = bool
name = optional(string)
repository = optional(string)
chart = optional(string)
chart_version = optional(string)
namespace = optional(string)
values = optional(map(string))
})
|
{
"chart": "cluster-autoscaler",
"chart_version": "9.37.0",
"enabled": true,
"name": "cluster-autoscaler",
"namespace": "kube-system",
"repository": "https://kubernetes.github.io/autoscaler",
"values": {}
}
| no | +| [anyscale\_ingress\_aws\_nlb\_internal](#input\_anyscale\_ingress\_aws\_nlb\_internal) | (Optioanl) Determines if the AWS NLB should be internal.

Requires `cloud_provider` to be set to `aws`.
Requires `anyscale_ingress_chart` to be enabled.

ex:
anyscale_ingress_aws_nlb_internal = true
| `bool` | `false` | no | +| [anyscale\_ingress\_chart](#input\_anyscale\_ingress\_chart) | (Optional) The Helm chart to install the Cluster Ingress.

ex:
anyscale_ingress_chart = {
enabled = true
name = "anyscale-ingress"
respository = "https://kubernetes.github.io/ingress-nginx"
chart = "ingress-nginx"
chart_version = "4.11.1"
namespace = "ingress-nginx"
values = {
"some.other.config" = "value"
}
}
|
object({
enabled = bool
name = optional(string)
repository = optional(string)
chart = optional(string)
chart_version = optional(string)
namespace = optional(string)
values = optional(map(string))
})
|
{
"chart": "ingress-nginx",
"chart_version": "4.11.1",
"enabled": true,
"name": "anyscale-ingress",
"namespace": "ingress-nginx",
"repository": "https://kubernetes.github.io/ingress-nginx",
"values": {
"controller.allowSnippetAnnotations": "true",
"controller.autoscaling.enabled": "true",
"controller.service.type": "LoadBalancer"
}
}
| no | +| [anyscale\_metrics\_server\_chart](#input\_anyscale\_metrics\_server\_chart) | (Optional) The Helm chart to install the Metrics Server.

Required for the Anyscale Autoscaler to function.

ex:
anyscale_metrics_server_chart = {
enabled = true
name = "metrics-server"
respository = "https://kubernetes-sigs.github.io/metrics-server/"
chart = "metrics-server"
chart_version = "3.12.1"
namespace = "metrics-server"
values = {
"some.other.config" = "value"
}
}
|
object({
enabled = bool
name = optional(string)
repository = optional(string)
chart = optional(string)
chart_version = optional(string)
namespace = optional(string)
values = optional(map(string))
})
|
{
"chart": "metrics-server",
"chart_version": "3.12.1",
"enabled": true,
"name": "metrics-server",
"namespace": "metrics-server",
"repository": "https://kubernetes-sigs.github.io/metrics-server/",
"values": {}
}
| no | +| [anyscale\_nvidia\_device\_plugin\_chart](#input\_anyscale\_nvidia\_device\_plugin\_chart) | (Optional) The Helm chart to install the NVIDIA Device Plugin.

Valid settings can be found in the [nvidia documentation](https://github.com/NVIDIA/k8s-device-plugin?tab=readme-ov-file#deploying-with-gpu-feature-discovery-for-automatic-node-labels)

ex:
anyscale_nvidia_device_plugin_chart = {
enabled = true
name = "nvidia-device-plugin"
respository = "https://nvidia.github.io/k8s-device-plugin"
chart = "nvidia-device-plugin"
chart_version = "0.16.2"
namespace = "nvidia-device-plugin"
values = {
"some.other.config" = "value"
}
}
|
object({
enabled = bool
name = optional(string)
repository = optional(string)
chart = optional(string)
chart_version = optional(string)
namespace = optional(string)
values = optional(map(string))
})
|
{
"chart": "nvidia-device-plugin",
"chart_version": "0.16.2",
"enabled": true,
"name": "anyscale-nvidia-device-plugin",
"namespace": "nvidia-device-plugin",
"repository": "https://nvidia.github.io/k8s-device-plugin",
"values": {
"gfd.enabled": "true",
"nfd.worker.tolerations[0].effect": "NoSchedule",
"nfd.worker.tolerations[0].key": "node-role.kubernetes.io/master",
"nfd.worker.tolerations[0].operator": "Equal",
"nfd.worker.tolerations[0].value": "",
"nfd.worker.tolerations[1].effect": "NoSchedule",
"nfd.worker.tolerations[1].key": "nvidia.com/gpu",
"nfd.worker.tolerations[1].operator": "Equal",
"nfd.worker.tolerations[1].value": "present",
"nfd.worker.tolerations[2].effect": "NoSchedule",
"nfd.worker.tolerations[2].key": "node.anyscale.com/accelerator-type",
"nfd.worker.tolerations[2].operator": "Equal",
"nfd.worker.tolerations[2].value": "GPU",
"nfd.worker.tolerations[3].effect": "NoSchedule",
"nfd.worker.tolerations[3].key": "node.anyscale.com/capacity-type",
"nfd.worker.tolerations[3].operator": "Equal",
"nfd.worker.tolerations[3].value": "ANY",
"priorityClassName": "system-node-critical",
"tolerations[0].effect": "NoSchedule",
"tolerations[0].key": "nvidia.com/gpu",
"tolerations[0].operator": "Equal",
"tolerations[0].value": "present",
"tolerations[1].effect": "NoSchedule",
"tolerations[1].key": "node.anyscale.com/accelerator-type",
"tolerations[1].operator": "Equal",
"tolerations[1].value": "GPU",
"tolerations[2].effect": "NoSchedule",
"tolerations[2].key": "node.anyscale.com/capacity-type",
"tolerations[2].operator": "Equal",
"tolerations[2].value": "ANY"
}
}
| no | +| [anyscale\_prometheus\_chart](#input\_anyscale\_prometheus\_chart) | (Optional) The Helm chart to install Prometheus.

ex:
anyscale_prometheus_chart = {
enabled = true
name = "prometheus"
respository = "https://prometheus-community.github.io/helm-charts"
chart = "prometheus"
chart_version = "16.0.0"
namespace = "prometheus"
values = {
"some.other.config" = "value"
}
}
|
object({
enabled = bool
name = optional(string)
repository = optional(string)
chart = optional(string)
chart_version = optional(string)
namespace = optional(string)
values = optional(map(string))
})
|
{
"chart": "prometheus",
"chart_version": "25.26.0",
"enabled": false,
"name": "prometheus",
"namespace": "prometheus",
"repository": "https://prometheus-community.github.io/helm-charts",
"values": {}
}
| no | +| [kubernetes\_cluster\_name](#input\_kubernetes\_cluster\_name) | (Optional) The name of the Kubernetes cluster.

ex:
kubernetes_cluster_name = "my-cluster"
| `string` | `null` | no | +| [module\_enabled](#input\_module\_enabled) | (Optional) Determines if this module should create resources.

If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided.
ex:
module_enabled = true
| `bool` | `false` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [helm\_autoscaler\_status](#output\_helm\_autoscaler\_status) | Status of the Cluster Autoscaler Helm release | +| [helm\_nginx\_ingress\_status](#output\_helm\_nginx\_ingress\_status) | Status of the Ingress Helm release | +| [helm\_nvidia\_status](#output\_helm\_nvidia\_status) | Status of the Nvidia Helm release | +| [nginx\_ingress\_lb\_hostname](#output\_nginx\_ingress\_lb\_hostname) | Hostname of the nginx load balancer | +| [nginx\_ingress\_lb\_ips](#output\_nginx\_ingress\_lb\_ips) | IPs of the nginx load balancer | + + + +[Terraform]: https://www.terraform.io +[Issues]: https://github.com/anyscale/sa-sandbox-terraform/issues +[badge-build]: https://github.com/anyscale/sa-sandbox-terraform/workflows/CI/CD%20Pipeline/badge.svg +[badge-terraform]: https://img.shields.io/badge/terraform-1.x%20-623CE4.svg?logo=terraform +[badge-tf-aws]: https://img.shields.io/badge/AWS-5.+-F8991D.svg?logo=terraform +[build-status]: https://github.com/anyscale/sa-sandbox-terraform/actions +[badge-opentofu]: https://img.shields.io/badge/opentofu-1.x%20-623CE4.svg?logo=terraform +[badge-tf-google]: https://img.shields.io/badge/Google-5.+-F8991D.svg?logo=terraform +[badge-tf-kubernetes]: https://img.shields.io/badge/KUBERNETES-2.+-F8991D.svg?logo=terraform diff --git a/modules/anyscale-k8s-helm/data.tf b/modules/anyscale-k8s-helm/data.tf new file mode 100644 index 0000000..fcd549b --- /dev/null +++ b/modules/anyscale-k8s-helm/data.tf @@ -0,0 +1,12 @@ +# AWS Data Sources +data "aws_caller_identity" "current" { + count = var.cloud_provider == "aws" ? 1 : 0 +} +data "aws_region" "current" { + count = var.cloud_provider == "aws" ? 1 : 0 +} + +# GCP Data Sources +# data "google_client_config" "current" { +# count = var.cloud_provider == "gcp" ? 1 : 0 +# } diff --git a/anyscale-k8s-helm/helm-autoscaler.tf b/modules/anyscale-k8s-helm/helm-autoscaler.tf similarity index 69% rename from anyscale-k8s-helm/helm-autoscaler.tf rename to modules/anyscale-k8s-helm/helm-autoscaler.tf index 5dcf4d5..e845577 100644 --- a/anyscale-k8s-helm/helm-autoscaler.tf +++ b/modules/anyscale-k8s-helm/helm-autoscaler.tf @@ -2,7 +2,7 @@ # https://github.com/kubernetes/autoscaler resource "helm_release" "anyscale_cluster_autoscaler" { - count = local.module_enabled && var.cloud_provider == "aws" ? 1 : 0 + count = local.module_enabled && var.cloud_provider == "aws" && var.anyscale_cluster_autoscaler_chart.enabled ? 1 : 0 name = var.anyscale_cluster_autoscaler_chart.name repository = var.anyscale_cluster_autoscaler_chart.repository @@ -17,9 +17,17 @@ resource "helm_release" "anyscale_cluster_autoscaler" { value = var.kubernetes_cluster_name } - set { - name = "awsRegion" - value = data.aws_region.current[0].name + dynamic "set" { + for_each = var.cloud_provider == "aws" ? [ + { + name = "awsRegion" + value = data.aws_region.current[0].name + } + ] : [] + content { + name = set.value.name + value = set.value.value + } } dynamic "set" { diff --git a/modules/anyscale-k8s-helm/helm-ingress.tf b/modules/anyscale-k8s-helm/helm-ingress.tf new file mode 100644 index 0000000..5fe6a8f --- /dev/null +++ b/modules/anyscale-k8s-helm/helm-ingress.tf @@ -0,0 +1,78 @@ +# -------------------------------------------------------------------------------- +# Description: This file contains the terraform configuration to deploy the ingress controller using helm. +# -------------------------------------------------------------------------------- + +resource "kubernetes_namespace" "ingress_nginx" { + count = local.module_enabled && var.anyscale_ingress_chart.enabled ? 1 : 0 + + metadata { + name = try(var.anyscale_ingress_chart.namespace, "ingress-nginx") + } + +} + +resource "helm_release" "nginx_ingress" { + count = local.module_enabled && var.anyscale_ingress_chart.enabled ? 1 : 0 + + name = var.anyscale_ingress_chart.name + repository = var.anyscale_ingress_chart.repository + chart = var.anyscale_ingress_chart.chart + namespace = kubernetes_namespace.ingress_nginx[0].metadata[0].name + version = var.anyscale_ingress_chart.chart_version + create_namespace = false + wait = false + + dynamic "set" { + for_each = var.anyscale_ingress_chart.values + content { + name = set.key + value = set.value + } + } + + dynamic "set" { + for_each = var.cloud_provider == "aws" ? [ + { + name = "controller.service.annotations.service\\.beta\\.kubernetes\\.io/aws-load-balancer-type" + value = "nlb" + }, + { + name = "controller.service.annotations.service\\.beta\\.kubernetes\\.io/aws-load-balancer-cross-zone-load-balancing-enabled" + value = "true" + } + ] : [] + content { + name = set.value["name"] + value = set.value["value"] + } + } + + dynamic "set" { + for_each = var.cloud_provider == "aws" && var.anyscale_ingress_aws_nlb_internal ? [ + { + name = "controller.service.annotations.service\\.beta\\.kubernetes\\.io/aws-load-balancer-internal" + value = "true" + } + ] : [] + content { + name = set.value.name + value = set.value.value + } + } + + depends_on = [ + kubernetes_namespace.ingress_nginx, + time_sleep.wait_helm_termination[0] + ] + + timeout = 600 + +} + +data "kubernetes_service" "nginx_ingress" { + count = local.module_enabled ? 1 : 0 + metadata { + name = "${helm_release.nginx_ingress[0].name}-${helm_release.nginx_ingress[0].chart}-controller" + namespace = var.anyscale_ingress_chart.namespace + } +} diff --git a/modules/anyscale-k8s-helm/helm-metricsserver.tf b/modules/anyscale-k8s-helm/helm-metricsserver.tf new file mode 100644 index 0000000..56ef63e --- /dev/null +++ b/modules/anyscale-k8s-helm/helm-metricsserver.tf @@ -0,0 +1,20 @@ +# Description: This file contains the terraform configuration to deploy the metrics server helm chart. +resource "helm_release" "feature_metrics_server" { + count = local.module_enabled && var.anyscale_metrics_server_chart.enabled ? 1 : 0 + + name = var.anyscale_metrics_server_chart.name + repository = var.anyscale_metrics_server_chart.repository + chart = var.anyscale_metrics_server_chart.chart + namespace = var.anyscale_metrics_server_chart.namespace + version = var.anyscale_metrics_server_chart.chart_version + + create_namespace = true + + dynamic "set" { + for_each = var.anyscale_metrics_server_chart.values + content { + name = set.key + value = set.value + } + } +} diff --git a/modules/anyscale-k8s-helm/helm-nvidia.tf b/modules/anyscale-k8s-helm/helm-nvidia.tf new file mode 100644 index 0000000..a31bd6d --- /dev/null +++ b/modules/anyscale-k8s-helm/helm-nvidia.tf @@ -0,0 +1,20 @@ +# Description: This file contains the terraform configuration to deploy the NVIDIA device plugin helm chart. +resource "helm_release" "nvidia" { + count = local.module_enabled && var.anyscale_nvidia_device_plugin_chart.enabled ? 1 : 0 + name = var.anyscale_nvidia_device_plugin_chart.name + repository = var.anyscale_nvidia_device_plugin_chart.repository + chart = var.anyscale_nvidia_device_plugin_chart.chart + namespace = var.anyscale_nvidia_device_plugin_chart.namespace + version = var.anyscale_nvidia_device_plugin_chart.chart_version + + create_namespace = true + + dynamic "set" { + for_each = var.anyscale_nvidia_device_plugin_chart.values + content { + name = set.key + value = set.value + } + } + +} diff --git a/modules/anyscale-k8s-helm/helm-prometheus.tf b/modules/anyscale-k8s-helm/helm-prometheus.tf new file mode 100644 index 0000000..57d7404 --- /dev/null +++ b/modules/anyscale-k8s-helm/helm-prometheus.tf @@ -0,0 +1,22 @@ +# Description: This file contains the terraform configuration to deploy the prometheus helm chart. +resource "helm_release" "prometheus" { + count = local.module_enabled && var.anyscale_prometheus_chart.enabled ? 1 : 0 + + name = var.anyscale_prometheus_chart.name + repository = var.anyscale_prometheus_chart.repository + chart = var.anyscale_prometheus_chart.chart + namespace = var.anyscale_prometheus_chart.namespace + version = var.anyscale_prometheus_chart.chart_version + + create_namespace = true + + dynamic "set" { + for_each = var.anyscale_prometheus_chart.values + content { + name = set.key + value = set.value + } + } + + timeout = 900 +} diff --git a/modules/anyscale-k8s-helm/main.tf b/modules/anyscale-k8s-helm/main.tf new file mode 100644 index 0000000..a333d50 --- /dev/null +++ b/modules/anyscale-k8s-helm/main.tf @@ -0,0 +1,12 @@ +locals { + module_enabled = var.module_enabled + helm_termination_grace_period_seconds = 300 # 5 minutes to allow connection draining +} + +# Helm chart destruction will return immediately, we need to wait until the pods are fully evicted +# https://github.com/hashicorp/terraform-provider-helm/issues/593 +resource "time_sleep" "wait_helm_termination" { + count = local.module_enabled ? 1 : 0 + + destroy_duration = "${local.helm_termination_grace_period_seconds}s" +} diff --git a/modules/anyscale-k8s-helm/outputs.tf b/modules/anyscale-k8s-helm/outputs.tf new file mode 100644 index 0000000..7055536 --- /dev/null +++ b/modules/anyscale-k8s-helm/outputs.tf @@ -0,0 +1,24 @@ +output "nginx_ingress_lb_hostname" { + description = "Hostname of the nginx load balancer" + value = try(data.kubernetes_service.nginx_ingress[0].status[0].load_balancer[0].ingress[*].hostname, []) +} + +output "nginx_ingress_lb_ips" { + description = "IPs of the nginx load balancer" + value = try(data.kubernetes_service.nginx_ingress[0].status[0].load_balancer[0].ingress[*].ip, []) +} + +output "helm_nginx_ingress_status" { + description = "Status of the Ingress Helm release" + value = try(helm_release.nginx_ingress[0].status, "") +} + +output "helm_nvidia_status" { + description = "Status of the Nvidia Helm release" + value = try(helm_release.nvidia[0].status, "") +} + +output "helm_autoscaler_status" { + description = "Status of the Cluster Autoscaler Helm release" + value = try(helm_release.anyscale_cluster_autoscaler[0].status, "") +} diff --git a/anyscale-k8s-helm/test/README.md b/modules/anyscale-k8s-helm/test/README.md similarity index 99% rename from anyscale-k8s-helm/test/README.md rename to modules/anyscale-k8s-helm/test/README.md index 83ed867..9912858 100644 --- a/anyscale-k8s-helm/test/README.md +++ b/modules/anyscale-k8s-helm/test/README.md @@ -2,4 +2,3 @@ The `anyscale-k8s-helm` module is cloud agnostic. There are tests for both AWS and GCP as subfolders. - diff --git a/modules/anyscale-k8s-helm/test/anyscale-aws-test/main.tf b/modules/anyscale-k8s-helm/test/anyscale-aws-test/main.tf new file mode 100644 index 0000000..650999e --- /dev/null +++ b/modules/anyscale-k8s-helm/test/anyscale-aws-test/main.tf @@ -0,0 +1,212 @@ +# --------------------------------------------------------------------------------------------------------------------- +# CREATE Anyscale K8s Helm Resources +# This template creates EKS resources for Anyscale +# Requires: +# - VPC +# - Security Group +# - IAM Roles +# - EKS Cluster +# - EKS Nodegroups +# --------------------------------------------------------------------------------------------------------------------- +locals { + # azs = slice(data.aws_availability_zones.available.names, 0, 3) + + full_tags = merge(tomap({ + anyscale-cloud-id = var.anyscale_cloud_id, + anyscale-deploy-environment = var.anyscale_deploy_env + }), + var.tags + ) +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create resources for EKS TF Module +# Creates a VPC +# Creates a Security Group +# Creates IAM Roles +# --------------------------------------------------------------------------------------------------------------------- +locals { + public_subnets = ["172.24.101.0/24", "172.24.102.0/24", "172.24.103.0/24"] + private_subnets = ["172.24.20.0/24", "172.24.21.0/24", "172.24.22.0/24"] +} +module "eks_vpc" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-vpc" + + anyscale_vpc_name = "tftest-k8s-helm" + cidr_block = "172.24.0.0/16" + + public_subnets = local.public_subnets + private_subnets = local.private_subnets +} +locals { + # Because subnet ID may not be known at plan time, we cannot use it as a key + anyscale_subnet_count = length(local.private_subnets) +} + +module "eks_securitygroup" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-securitygroups" + + vpc_id = module.eks_vpc.vpc_id + + security_group_name_prefix = "tftest-k8s-helm-" + + ingress_with_self = [ + { rule = "all-all" } + ] +} + +module "eks_iam_roles" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-iam" + + module_enabled = true + create_anyscale_access_role = false + create_cluster_node_instance_profile = false + create_iam_s3_policy = false + + create_anyscale_eks_cluster_role = true + anyscale_eks_cluster_role_name = "tftest-k8s-helm-cluster" + create_anyscale_eks_node_role = true + anyscale_eks_node_role_name = "tftest-k8s-helm-node-role" + + tags = local.full_tags +} + +locals { + coredns_config = jsonencode({ + affinity = { + nodeAffinity = { + requiredDuringSchedulingIgnoredDuringExecution = { + nodeSelectorTerms = [ + { + matchExpressions = [ + { + key = "node-type" + operator = "In" + values = ["management"] + } + ] + } + ] + } + } + }, + nodeSelector = { + "node-type" = "management" + }, + tolerations = [ + { + key = "CriticalAddonsOnly" + operator = "Exists" + }, + { + effect = "NoSchedule" + key = "node-role.kubernetes.io/control-plane" + } + ], + replicaCount = 2 + }) +} + +module "eks_cluster" { + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-cluster" + + module_enabled = true + + anyscale_subnet_ids = module.eks_vpc.public_subnet_ids + anyscale_subnet_count = local.anyscale_subnet_count + anyscale_security_group_id = module.eks_securitygroup.security_group_id + eks_role_arn = module.eks_iam_roles.iam_anyscale_eks_cluster_role_arn + anyscale_eks_name = "tftest-k8s-helm" + + eks_addons = [ + { + addon_name = "coredns" + addon_version = "v1.11.1-eksbuild.8" + configuration_values = local.coredns_config + } + ] + eks_addons_depends_on = module.eks_nodegroups + + tags = local.full_tags + + depends_on = [module.eks_iam_roles, module.eks_vpc, module.eks_securitygroup] +} + +module "eks_nodegroups" { + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-nodegroups" + + module_enabled = true + + eks_node_role_arn = module.eks_iam_roles.iam_anyscale_eks_node_role_arn + eks_cluster_name = module.eks_cluster.eks_cluster_name + subnet_ids = module.eks_vpc.public_subnet_ids + + tags = local.full_tags +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create Helm Resources with no optional parameters +# --------------------------------------------------------------------------------------------------------------------- +module "all_defaults" { + source = "../../" + + module_enabled = true + cloud_provider = "aws" + + kubernetes_cluster_name = module.eks_cluster.eks_cluster_name + + depends_on = [module.eks_nodegroups] +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create Helm Resources with as many optional parameters as possible - not currently working. +# --------------------------------------------------------------------------------------------------------------------- +# module "kitchen_sink" { +# source = "../../" + +# module_enabled = true +# cloud_provider = "aws" + +# kubernetes_cluster_name = module.eks_cluster.eks_cluster_name + +# anyscale_cluster_autoscaler_chart = { +# enabled = false +# } +# anyscale_nvidia_device_plugin_chart = { +# enabled = false +# } + +# anyscale_metrics_server_chart = { +# enabled = false +# } + +# anyscale_ingress_chart = { +# enabled = true +# name = "kitchensink-ingress" +# repository = "https://kubernetes.github.io/ingress-nginx" +# chart = "ingress-nginx" +# chart_version = "4.11.1" +# namespace = "kitchensink-ingress-nginx" +# values = { +# "controller.service.type" = "LoadBalancer" +# "controller.allowSnippetAnnotations" = "true" +# "controller.autoscaling.enabled" = "true" +# } +# } +# anyscale_ingress_aws_nlb_internal = true + +# depends_on = [module.eks_nodegroups] +# } + +# --------------------------------------------------------------------------------------------------------------------- +# Do not create any resources +# --------------------------------------------------------------------------------------------------------------------- +module "test_no_resources" { + source = "../.." + + module_enabled = false + cloud_provider = "aws" +} diff --git a/anyscale-k8s-helm/test/anyscale-aws-test/outputs.tf b/modules/anyscale-k8s-helm/test/anyscale-aws-test/outputs.tf similarity index 100% rename from anyscale-k8s-helm/test/anyscale-aws-test/outputs.tf rename to modules/anyscale-k8s-helm/test/anyscale-aws-test/outputs.tf diff --git a/modules/anyscale-k8s-helm/test/anyscale-aws-test/variables.tf b/modules/anyscale-k8s-helm/test/anyscale-aws-test/variables.tf new file mode 100644 index 0000000..9393991 --- /dev/null +++ b/modules/anyscale-k8s-helm/test/anyscale-aws-test/variables.tf @@ -0,0 +1,58 @@ +# --------------------------------------------------------------------------------------------------------------------- +# ENVIRONMENT VARIABLES +# Define these secrets as environment variables +# --------------------------------------------------------------------------------------------------------------------- + +# AWS_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY + +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED VARIABLES +# These variables must be set when using this module. +# --------------------------------------------------------------------------------------------------------------------- + +variable "aws_region" { + description = "The AWS region in which all resources will be created." + type = string + default = "us-east-2" +} + +variable "anyscale_cloud_id" { + description = "(Optional) Anyscale Cloud ID. Default is `null`." + type = string + default = null + validation { + condition = ( + var.anyscale_cloud_id == null ? true : ( + length(var.anyscale_cloud_id) > 4 && + substr(var.anyscale_cloud_id, 0, 4) == "cld_" + ) + ) + error_message = "The anyscale_cloud_id value must start with \"cld_\"." + } +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "anyscale_deploy_env" { + description = "(Optional) Anyscale deploy environment. Used in resource names and tags." + type = string + default = "production" + validation { + condition = ( + var.anyscale_deploy_env == "production" || var.anyscale_deploy_env == "development" || var.anyscale_deploy_env == "test" + ) + error_message = "The anyscale_deploy_env only allows `production`, `test`, or `development`" + } +} + +variable "tags" { + description = "(Optional) A map of tags to all resources that accept tags." + type = map(string) + default = { + "test" : true, + "environment" : "test" + } +} diff --git a/anyscale-k8s-helm/test/anyscale-aws-test/versions.tf b/modules/anyscale-k8s-helm/test/anyscale-aws-test/versions.tf similarity index 69% rename from anyscale-k8s-helm/test/anyscale-aws-test/versions.tf rename to modules/anyscale-k8s-helm/test/anyscale-aws-test/versions.tf index 47701d0..f7f58d0 100644 --- a/anyscale-k8s-helm/test/anyscale-aws-test/versions.tf +++ b/modules/anyscale-k8s-helm/test/anyscale-aws-test/versions.tf @@ -33,6 +33,17 @@ provider "helm" { } } +provider "kubernetes" { + host = module.eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.eks_cluster.eks_cluster_name] + command = "aws" + } +} + provider "aws" { region = var.aws_region } diff --git a/anyscale-k8s-helm/test/anyscale-gcp-test/versions.tf b/modules/anyscale-k8s-helm/test/anyscale-gcp-test/versions.tf similarity index 100% rename from anyscale-k8s-helm/test/anyscale-gcp-test/versions.tf rename to modules/anyscale-k8s-helm/test/anyscale-gcp-test/versions.tf diff --git a/modules/anyscale-k8s-helm/variables.tf b/modules/anyscale-k8s-helm/variables.tf new file mode 100644 index 0000000..62ab5cd --- /dev/null +++ b/modules/anyscale-k8s-helm/variables.tf @@ -0,0 +1,359 @@ +# ------------------------------------------------------------------------------ +# REQUIRED PARAMETERS +# These variables must be set when using this module. +# ------------------------------------------------------------------------------ +variable "cloud_provider" { + description = <<-EOT + (Required) The cloud provider (aws or gcp) + + ex: + ``` + cloud_provider = "aws" + ``` + EOT + type = string + validation { + condition = ( + var.cloud_provider == "aws" || var.cloud_provider == "gcp" + ) + error_message = "The cloud_provider only allows `aws` or `gcp`" + } +} + +variable "kubernetes_cluster_name" { + type = string + description = <<-EOT + (Optional) The name of the Kubernetes cluster. + + ex: + ``` + kubernetes_cluster_name = "my-cluster" + ``` + EOT + default = null +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "module_enabled" { + description = <<-EOT + (Optional) Determines if this module should create resources. + + If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided. + ex: + ``` + module_enabled = true + ``` + EOT + type = bool + default = false +} + + +# variable "anyscale_node_tolerations" { +# description = <<-EOT +# (Optional) List of tolerations to apply to helm charts that need to run on Anyscale Nodes. + +# ex: +# ``` +# anyscale_node_tolerations = [ +# { +# key = "node.anyscale.com/capacity-type" +# operator = "Equal" +# value = "ANY" +# effect = "NoSchedule" +# }, +# { +# key = "node.anyscale.com/accelerator-type" +# operator = "Equal" +# value = "GPU" +# effect = "NoSchedule" +# } +# ] +# ``` +# EOT +# type = list( +# object({ +# key = string +# operator = string +# value = string +# effect = string +# }) +# ) +# default = [ +# { +# key = "node.anyscale.com/capacity-type" +# operator = "Equal" +# value = "ANY" +# effect = "NoSchedule" +# }, +# { +# key = "node.anyscale.com/accelerator-type" +# operator = "Equal" +# value = "GPU" +# effect = "NoSchedule" +# } +# ] +# } + +# ------------------------------------------------------------------------------ +# Helm Chart Variables +# ------------------------------------------------------------------------------ +variable "anyscale_cluster_autoscaler_chart" { + description = <<-EOT + (Optional) The Helm chart to install the Cluster Autoscaler. + + ex: + ``` + anyscale_cluster_autoscaler_chart = { + enabled = true + name = "cluster-autoscaler" + respository = "https://kubernetes.github.io/autoscaler" + chart = "cluster-autoscaler" + chart_version = "9.37.0" + namespace = "kube-system" + values = { + "some.other.config" = "value" + } + } + ``` + EOT + type = object({ + enabled = bool + name = optional(string) + repository = optional(string) + chart = optional(string) + chart_version = optional(string) + namespace = optional(string) + values = optional(map(string)) + }) + default = { + enabled = true + name = "cluster-autoscaler" + repository = "https://kubernetes.github.io/autoscaler" + chart = "cluster-autoscaler" + chart_version = "9.37.0" + namespace = "kube-system" + values = {} + } +} + +variable "anyscale_ingress_chart" { + description = <<-EOT + (Optional) The Helm chart to install the Cluster Ingress. + + ex: + ``` + anyscale_ingress_chart = { + enabled = true + name = "anyscale-ingress" + respository = "https://kubernetes.github.io/ingress-nginx" + chart = "ingress-nginx" + chart_version = "4.11.1" + namespace = "ingress-nginx" + values = { + "some.other.config" = "value" + } + } + ``` + EOT + type = object({ + enabled = bool + name = optional(string) + repository = optional(string) + chart = optional(string) + chart_version = optional(string) + namespace = optional(string) + values = optional(map(string)) + }) + default = { + enabled = true + name = "anyscale-ingress" + repository = "https://kubernetes.github.io/ingress-nginx" + chart = "ingress-nginx" + chart_version = "4.11.1" + namespace = "ingress-nginx" + values = { + "controller.service.type" = "LoadBalancer" + "controller.allowSnippetAnnotations" = "true" + "controller.autoscaling.enabled" = "true" + } + } +} + +variable "anyscale_ingress_aws_nlb_internal" { + description = <<-EOT + (Optioanl) Determines if the AWS NLB should be internal. + + Requires `cloud_provider` to be set to `aws`. + Requires `anyscale_ingress_chart` to be enabled. + + ex: + ``` + anyscale_ingress_aws_nlb_internal = true + ``` + EOT + type = bool + default = false +} + +variable "anyscale_nvidia_device_plugin_chart" { + description = <<-EOT + (Optional) The Helm chart to install the NVIDIA Device Plugin. + + Valid settings can be found in the [nvidia documentation](https://github.com/NVIDIA/k8s-device-plugin?tab=readme-ov-file#deploying-with-gpu-feature-discovery-for-automatic-node-labels) + + ex: + ``` + anyscale_nvidia_device_plugin_chart = { + enabled = true + name = "nvidia-device-plugin" + respository = "https://nvidia.github.io/k8s-device-plugin" + chart = "nvidia-device-plugin" + chart_version = "0.16.2" + namespace = "nvidia-device-plugin" + values = { + "some.other.config" = "value" + } + } + ``` + EOT + type = object({ + enabled = bool + name = optional(string) + repository = optional(string) + chart = optional(string) + chart_version = optional(string) + namespace = optional(string) + values = optional(map(string)) + }) + default = { + enabled = true + name = "anyscale-nvidia-device-plugin" + repository = "https://nvidia.github.io/k8s-device-plugin" + chart = "nvidia-device-plugin" + chart_version = "0.16.2" + namespace = "nvidia-device-plugin" + values = { + "gfd.enabled" = "true", + "priorityClassName" = "system-node-critical" + + "nfd.worker.tolerations[0].key" = "node-role.kubernetes.io/master" + "nfd.worker.tolerations[0].operator" = "Equal" + "nfd.worker.tolerations[0].value" = "" + "nfd.worker.tolerations[0].effect" = "NoSchedule" + + "nfd.worker.tolerations[1].key" = "nvidia.com/gpu" + "nfd.worker.tolerations[1].operator" = "Equal" + "nfd.worker.tolerations[1].value" = "present" + "nfd.worker.tolerations[1].effect" = "NoSchedule" + + "nfd.worker.tolerations[2].key" = "node.anyscale.com/accelerator-type" + "nfd.worker.tolerations[2].operator" = "Equal" + "nfd.worker.tolerations[2].value" = "GPU" + "nfd.worker.tolerations[2].effect" = "NoSchedule" + + "nfd.worker.tolerations[3].key" = "node.anyscale.com/capacity-type" + "nfd.worker.tolerations[3].operator" = "Equal" + "nfd.worker.tolerations[3].value" = "ANY" + "nfd.worker.tolerations[3].effect" = "NoSchedule" + + "tolerations[0].key" = "nvidia.com/gpu" + "tolerations[0].operator" = "Equal" + "tolerations[0].value" = "present" + "tolerations[0].effect" = "NoSchedule" + + "tolerations[1].key" = "node.anyscale.com/accelerator-type" + "tolerations[1].operator" = "Equal" + "tolerations[1].value" = "GPU" + "tolerations[1].effect" = "NoSchedule" + + "tolerations[2].key" = "node.anyscale.com/capacity-type" + "tolerations[2].operator" = "Equal" + "tolerations[2].value" = "ANY" + "tolerations[2].effect" = "NoSchedule" + } + } +} + +variable "anyscale_metrics_server_chart" { + description = <<-EOT + (Optional) The Helm chart to install the Metrics Server. + + Required for the Anyscale Autoscaler to function. + + ex: + ``` + anyscale_metrics_server_chart = { + enabled = true + name = "metrics-server" + respository = "https://kubernetes-sigs.github.io/metrics-server/" + chart = "metrics-server" + chart_version = "3.12.1" + namespace = "metrics-server" + values = { + "some.other.config" = "value" + } + } + ``` + EOT + type = object({ + enabled = bool + name = optional(string) + repository = optional(string) + chart = optional(string) + chart_version = optional(string) + namespace = optional(string) + values = optional(map(string)) + }) + default = { + enabled = true + name = "metrics-server" + repository = "https://kubernetes-sigs.github.io/metrics-server/" + chart = "metrics-server" + chart_version = "3.12.1" + namespace = "metrics-server" + values = {} + } +} + +variable "anyscale_prometheus_chart" { + description = <<-EOT + (Optional) The Helm chart to install Prometheus. + + ex: + ``` + anyscale_prometheus_chart = { + enabled = true + name = "prometheus" + respository = "https://prometheus-community.github.io/helm-charts" + chart = "prometheus" + chart_version = "16.0.0" + namespace = "prometheus" + values = { + "some.other.config" = "value" + } + } + ``` + EOT + type = object({ + enabled = bool + name = optional(string) + repository = optional(string) + chart = optional(string) + chart_version = optional(string) + namespace = optional(string) + values = optional(map(string)) + }) + default = { + enabled = false + name = "prometheus" + repository = "https://prometheus-community.github.io/helm-charts" + chart = "prometheus" + chart_version = "25.26.0" + namespace = "prometheus" + values = {} + } +} diff --git a/anyscale-k8s-helm/versions.tf b/modules/anyscale-k8s-helm/versions.tf similarity index 61% rename from anyscale-k8s-helm/versions.tf rename to modules/anyscale-k8s-helm/versions.tf index 071177d..8a43595 100644 --- a/anyscale-k8s-helm/versions.tf +++ b/modules/anyscale-k8s-helm/versions.tf @@ -2,16 +2,6 @@ terraform { required_version = ">= 1.0" required_providers { - aws = { - source = "hashicorp/aws" - version = "~> 5.0" - } - - google = { - source = "hashicorp/google" - version = "~> 5.0" - } - helm = { source = "hashicorp/helm" version = "~> 2.0" @@ -21,6 +11,10 @@ terraform { source = "hashicorp/kubernetes" version = "~> 2.0" } + + time = { + source = "hashicorp/time" + version = ">= 0.12" + } } } - diff --git a/anyscale-k8s-helm/README.md b/modules/anyscale-k8s-namespace/README.md similarity index 51% rename from anyscale-k8s-helm/README.md rename to modules/anyscale-k8s-namespace/README.md index 887022a..dfb542a 100644 --- a/anyscale-k8s-helm/README.md +++ b/modules/anyscale-k8s-namespace/README.md @@ -1,9 +1,15 @@ [![Build Status][badge-build]][build-status] [![Terraform Version][badge-terraform]](https://github.com/hashicorp/terraform/releases) +[![OpenTofu Version][badge-opentofu]](https://github.com/opentofu/opentofu/releases) +[![Kubernetes Provider Version][badge-tf-kubernetes]](https://github.com/terraform-providers/terraform-provider-kubernetes/releases) [![AWS Provider Version][badge-tf-aws]](https://github.com/terraform-providers/terraform-provider-aws/releases) +[![Google Provider Version][badge-tf-google]](https://github.com/terraform-providers/terraform-provider-google/releases) -# anyscale-k8s-helm -This module creates Kubernetes helm charts for Anyscale applications and workloads. +# anyscale-k8s-namespace + +This module creates a Kubernetes Namespace for Anyscale. + +The Anyscale Namespace can also be created via the Anycsale Helm Chart. @@ -14,4 +20,7 @@ This module creates Kubernetes helm charts for Anyscale applications and workloa [badge-build]: https://github.com/anyscale/sa-sandbox-terraform/workflows/CI/CD%20Pipeline/badge.svg [badge-terraform]: https://img.shields.io/badge/terraform-1.x%20-623CE4.svg?logo=terraform [badge-tf-aws]: https://img.shields.io/badge/AWS-5.+-F8991D.svg?logo=terraform -[build-status]: https://github.com/anyscale/sa-sandbox-terraform/actions \ No newline at end of file +[build-status]: https://github.com/anyscale/sa-sandbox-terraform/actions +[badge-opentofu]: https://img.shields.io/badge/opentofu-1.x%20-623CE4.svg?logo=terraform +[badge-tf-google]: https://img.shields.io/badge/Google-5.+-F8991D.svg?logo=terraform +[badge-tf-kubernetes]: https://img.shields.io/badge/KUBERNETES-2.+-F8991D.svg?logo=terraform diff --git a/modules/anyscale-k8s-namespace/main.tf b/modules/anyscale-k8s-namespace/main.tf new file mode 100644 index 0000000..f58d6a3 --- /dev/null +++ b/modules/anyscale-k8s-namespace/main.tf @@ -0,0 +1,10 @@ +locals { + module_enabled = var.module_enabled +} + +resource "kubernetes_namespace" "anyscale" { + count = local.module_enabled ? 1 : 0 + metadata { + name = var.anyscale_kubernetes_namespace + } +} diff --git a/modules/anyscale-k8s-namespace/outputs.tf b/modules/anyscale-k8s-namespace/outputs.tf new file mode 100644 index 0000000..6228972 --- /dev/null +++ b/modules/anyscale-k8s-namespace/outputs.tf @@ -0,0 +1,4 @@ +output "anyscale_kubernetes_namespace_name" { + description = "The name of the Kubernetes namespace." + value = try(kubernetes_namespace.anyscale[0].metadata[0].name, "") +} diff --git a/anyscale-k8s-helm/test/anyscale-aws-test/main.tf b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/main.tf similarity index 53% rename from anyscale-k8s-helm/test/anyscale-aws-test/main.tf rename to modules/anyscale-k8s-namespace/test/anyscale-aws-test/main.tf index b56e2c6..1507037 100644 --- a/anyscale-k8s-helm/test/anyscale-aws-test/main.tf +++ b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/main.tf @@ -1,5 +1,5 @@ # --------------------------------------------------------------------------------------------------------------------- -# CREATE Anyscale K8s Helm Resources +# CREATE Anyscale K8s ConfigMap Resources # This template creates EKS resources for Anyscale # Requires: # - VPC @@ -29,9 +29,10 @@ locals { private_subnets = ["172.24.20.0/24", "172.24.21.0/24", "172.24.22.0/24"] } module "eks_vpc" { - source = "../../../aws-anyscale-vpc" + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-vpc" - anyscale_vpc_name = "anyscale-tftest-eks" + anyscale_vpc_name = "tftest-k8s-namespace" cidr_block = "172.24.0.0/16" public_subnets = local.public_subnets @@ -43,11 +44,12 @@ locals { } module "eks_securitygroup" { - source = "../../../aws-anyscale-securitygroups" + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-securitygroups" vpc_id = module.eks_vpc.vpc_id - security_group_name_prefix = "anyscale-tftest-eks-" + security_group_name_prefix = "tftest-k8s-namespace-" ingress_with_self = [ { rule = "all-all" } @@ -55,23 +57,61 @@ module "eks_securitygroup" { } module "eks_iam_roles" { - source = "../../../aws-anyscale-iam" + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-iam" module_enabled = true - create_anyscale_access_role = false + create_anyscale_access_role = true + anyscale_access_role_name = "tftest-k8s-namespace-controlplane-role" create_cluster_node_instance_profile = false create_iam_s3_policy = false create_anyscale_eks_cluster_role = true - anyscale_eks_cluster_role_name = "anyscale-tftest-eks-cluster-role" + anyscale_eks_cluster_role_name = "tftest-k8s-namespace-cluster-role" create_anyscale_eks_node_role = true - anyscale_eks_node_role_name = "anyscale-tftest-eks-node-role" + anyscale_eks_node_role_name = "tftest-k8s-namespace-node-role" tags = local.full_tags } +locals { + coredns_config = jsonencode({ + affinity = { + nodeAffinity = { + requiredDuringSchedulingIgnoredDuringExecution = { + nodeSelectorTerms = [ + { + matchExpressions = [ + { + key = "node-type" + operator = "In" + values = ["management"] + } + ] + } + ] + } + } + }, + nodeSelector = { + "node-type" = "management" + }, + tolerations = [ + { + key = "CriticalAddonsOnly" + operator = "Exists" + }, + { + effect = "NoSchedule" + key = "node-role.kubernetes.io/control-plane" + } + ], + replicaCount = 2 + }) +} + module "eks_cluster" { - source = "../../../aws-anyscale-eks-cluster" + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-cluster" module_enabled = true @@ -79,12 +119,16 @@ module "eks_cluster" { anyscale_subnet_count = local.anyscale_subnet_count anyscale_security_group_id = module.eks_securitygroup.security_group_id eks_role_arn = module.eks_iam_roles.iam_anyscale_eks_cluster_role_arn + anyscale_eks_name = "tftest-k8s-namespace" tags = local.full_tags + + depends_on = [module.eks_iam_roles, module.eks_vpc, module.eks_securitygroup] } + # --------------------------------------------------------------------------------------------------------------------- -# Create Helm Resources with no optional parameters +# Create Resources with no optional parameters # --------------------------------------------------------------------------------------------------------------------- module "all_defaults" { source = "../../" @@ -92,9 +136,22 @@ module "all_defaults" { module_enabled = true cloud_provider = "aws" - kubernetes_cluster_name = module.eks_cluster.eks_cluster_name - kubernetes_endpoint_address = module.eks_cluster.eks_cluster_endpoint - kubernetes_cluster_ca_data = module.eks_cluster.eks_cluster_ca_data + depends_on = [module.eks_cluster] + +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create Resources with as many optional parameters as possible +# --------------------------------------------------------------------------------------------------------------------- +module "kitchen_sink" { + source = "../../" + + module_enabled = true + cloud_provider = "aws" + + anyscale_kubernetes_namespace = "tftest-k8s-namespace" + depends_on = [module.eks_cluster] + } # --------------------------------------------------------------------------------------------------------------------- diff --git a/modules/anyscale-k8s-namespace/test/anyscale-aws-test/outputs.tf b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/outputs.tf new file mode 100644 index 0000000..6db4254 --- /dev/null +++ b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/outputs.tf @@ -0,0 +1,23 @@ +# -------------- +# Defaults Test +# -------------- +output "all_defaults_resources" { + description = "The resources of the All Defaults test" + value = module.all_defaults +} + +# ------------------ +# Kitchen Sink Test +# ------------------ +output "kitchen_sink_resources" { + description = "The resources of the Kitchen Sink test" + value = module.kitchen_sink +} + +# ----------------- +# No resource test +# ----------------- +output "test_no_resources" { + description = "The outputs of the no_resource resource - should all be empty" + value = module.test_no_resources +} diff --git a/modules/anyscale-k8s-namespace/test/anyscale-aws-test/variables.tf b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/variables.tf new file mode 100644 index 0000000..9393991 --- /dev/null +++ b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/variables.tf @@ -0,0 +1,58 @@ +# --------------------------------------------------------------------------------------------------------------------- +# ENVIRONMENT VARIABLES +# Define these secrets as environment variables +# --------------------------------------------------------------------------------------------------------------------- + +# AWS_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY + +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED VARIABLES +# These variables must be set when using this module. +# --------------------------------------------------------------------------------------------------------------------- + +variable "aws_region" { + description = "The AWS region in which all resources will be created." + type = string + default = "us-east-2" +} + +variable "anyscale_cloud_id" { + description = "(Optional) Anyscale Cloud ID. Default is `null`." + type = string + default = null + validation { + condition = ( + var.anyscale_cloud_id == null ? true : ( + length(var.anyscale_cloud_id) > 4 && + substr(var.anyscale_cloud_id, 0, 4) == "cld_" + ) + ) + error_message = "The anyscale_cloud_id value must start with \"cld_\"." + } +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "anyscale_deploy_env" { + description = "(Optional) Anyscale deploy environment. Used in resource names and tags." + type = string + default = "production" + validation { + condition = ( + var.anyscale_deploy_env == "production" || var.anyscale_deploy_env == "development" || var.anyscale_deploy_env == "test" + ) + error_message = "The anyscale_deploy_env only allows `production`, `test`, or `development`" + } +} + +variable "tags" { + description = "(Optional) A map of tags to all resources that accept tags." + type = map(string) + default = { + "test" : true, + "environment" : "test" + } +} diff --git a/modules/anyscale-k8s-namespace/test/anyscale-aws-test/versions.tf b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/versions.tf new file mode 100644 index 0000000..c46128a --- /dev/null +++ b/modules/anyscale-k8s-namespace/test/anyscale-aws-test/versions.tf @@ -0,0 +1,31 @@ +terraform { + required_version = ">= 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + + + +provider "kubernetes" { + host = module.eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.eks_cluster.eks_cluster_name] + command = "aws" + } +} + +provider "aws" { + region = var.aws_region +} diff --git a/modules/anyscale-k8s-namespace/variables.tf b/modules/anyscale-k8s-namespace/variables.tf new file mode 100644 index 0000000..24b1438 --- /dev/null +++ b/modules/anyscale-k8s-namespace/variables.tf @@ -0,0 +1,65 @@ +# ------------------------------------------------------------------------------ +# REQUIRED PARAMETERS +# These variables must be set when using this module. +# ------------------------------------------------------------------------------ +variable "cloud_provider" { + description = <<-EOT + (Required) The cloud provider (aws or gcp) + + ex: + ``` + cloud_provider = "aws" + ``` + EOT + type = string + validation { + condition = ( + var.cloud_provider == "aws" || var.cloud_provider == "gcp" + ) + error_message = "The cloud_provider only allows `aws` or `gcp`" + } +} + +variable "kubernetes_cluster_name" { + type = string + description = <<-EOT + (Optional) The name of the Kubernetes cluster. + + ex: + ``` + kubernetes_cluster_name = "my-cluster" + ``` + EOT + default = null +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "module_enabled" { + description = <<-EOT + (Optional) Determines if this module should create resources. + + If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided. + ex: + ``` + module_enabled = true + ``` + EOT + type = bool + default = true +} + +variable "anyscale_kubernetes_namespace" { + description = <<-EOT + (Optional) The name of the Kubernetes namespace. + + ex: + ``` + anyscale_kubernetes_namespace = "anyscale-k8s" + ``` + EOT + type = string + default = "anyscale-k8s" +} diff --git a/modules/anyscale-k8s-namespace/versions.tf b/modules/anyscale-k8s-namespace/versions.tf new file mode 100644 index 0000000..94019cd --- /dev/null +++ b/modules/anyscale-k8s-namespace/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} diff --git a/modules/anyscale-k8s-persistent-volume/README.md b/modules/anyscale-k8s-persistent-volume/README.md new file mode 100644 index 0000000..2b7207b --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/README.md @@ -0,0 +1,75 @@ +[![Build Status][badge-build]][build-status] +[![Terraform Version][badge-terraform]](https://github.com/hashicorp/terraform/releases) +[![OpenTofu Version][badge-opentofu]](https://github.com/opentofu/opentofu/releases) +[![Kubernetes Provider Version][badge-tf-kubernetes]](https://github.com/terraform-providers/terraform-provider-kubernetes/releases) +[![AWS Provider Version][badge-tf-aws]](https://github.com/terraform-providers/terraform-provider-aws/releases) +[![Google Provider Version][badge-tf-google]](https://github.com/terraform-providers/terraform-provider-google/releases) + +# anyscale-k8s-persistent-volume - UNUSED + +!!! Unused sub-module !!! + +This module creates the resources for a persistent volume NFS mount and persistent volume claim. + + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [kubernetes](#requirement\_kubernetes) | ~> 2.0 | + +## Providers + +| Name | Version | +|------|---------| +| [kubernetes](#provider\_kubernetes) | 2.32.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [kubernetes_persistent_volume.anyscale](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/persistent_volume) | resource | +| [kubernetes_persistent_volume_claim.anyscale](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/persistent_volume_claim) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [cloud\_provider](#input\_cloud\_provider) | (Required) The cloud provider (aws or gcp)

ex:
cloud_provider = "aws"
| `string` | n/a | yes | +| [anyscale\_kubernetes\_namespace](#input\_anyscale\_kubernetes\_namespace) | (Optional) The name of the Kubernetes namespace.

ex:
anyscale_kubernetes_namespace = "anyscale-k8s"
| `string` | `"anyscale-k8s"` | no | +| [aws\_efs\_file\_system\_id](#input\_aws\_efs\_file\_system\_id) | (Optional) The ID of the EFS file system.

Required if `cloud_provider` is `aws`.

ex:
aws_efs_file_system_id = "fs-12345678"
| `string` | `null` | no | +| [gcp\_filestore\_ip](#input\_gcp\_filestore\_ip) | (Optional) The Filestore IP address.

Required if `cloud_provider` is `gcp`.

ex:
gcp_filestore_ip = "172.16.0.12"
| `string` | `null` | no | +| [gcp\_filestore\_share\_name](#input\_gcp\_filestore\_share\_name) | (Optional) The Filestore share name.

Required if `cloud_provider` is `gcp`.

ex:
gcp_filestore_share_name = "my-share"
| `string` | `null` | no | +| [kubernetes\_cluster\_name](#input\_kubernetes\_cluster\_name) | (Optional) The name of the Kubernetes cluster.

ex:
kubernetes_cluster_name = "my-cluster"
| `string` | `null` | no | +| [kubernetes\_persistent\_volume\_claim\_name](#input\_kubernetes\_persistent\_volume\_claim\_name) | (Optional) The name of the Kubernetes persistent volume claim.

ex:
kubernetes_persistent_volume_claim_name = "anyscale-nfs-claim"
| `string` | `"anyscale-nfs-claim"` | no | +| [kubernetes\_persistent\_volume\_name](#input\_kubernetes\_persistent\_volume\_name) | (Optional) The name of the Kubernetes persistent volume.

ex:
kubernetes_persistent_volume_name = "anyscale-nfs"
| `string` | `"anyscale-nfs"` | no | +| [kubernetes\_persistent\_volume\_size](#input\_kubernetes\_persistent\_volume\_size) | (Optional) The size of the Kubernetes persistent volume.

When using AWS EFS, this is just a placeholder. The actual size is elastically built, making this just a placeholder

ex:
kubernetes_persistent_volume_size = "20Gi"
| `string` | `"20Gi"` | no | +| [module\_enabled](#input\_module\_enabled) | (Optional) Determines if this module should create resources.

If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided.
ex:
module_enabled = true
| `bool` | `false` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [kubernetes\_persistent\_volume\_claim\_name](#output\_kubernetes\_persistent\_volume\_claim\_name) | The name of the Kubernetes persistent volume claim. | +| [kubernetes\_persistent\_volume\_claim\_namespace](#output\_kubernetes\_persistent\_volume\_claim\_namespace) | The namespace of the Kubernetes persistent volume claim. | +| [kubernetes\_persistent\_volume\_claim\_storageclassname](#output\_kubernetes\_persistent\_volume\_claim\_storageclassname) | The storage class name of the Kubernetes persistent volume claim. | +| [kubernetes\_persistent\_volume\_claim\_volumename](#output\_kubernetes\_persistent\_volume\_claim\_volumename) | The volume name of the Kubernetes persistent volume claim. | +| [kubernetes\_persistent\_volume\_name](#output\_kubernetes\_persistent\_volume\_name) | The name of the Kubernetes persistent volume. | + + + +[Terraform]: https://www.terraform.io +[Issues]: https://github.com/anyscale/sa-sandbox-terraform/issues +[badge-build]: https://github.com/anyscale/sa-sandbox-terraform/workflows/CI/CD%20Pipeline/badge.svg +[badge-terraform]: https://img.shields.io/badge/terraform-1.x%20-623CE4.svg?logo=terraform +[badge-tf-aws]: https://img.shields.io/badge/AWS-5.+-F8991D.svg?logo=terraform +[build-status]: https://github.com/anyscale/sa-sandbox-terraform/actions +[badge-opentofu]: https://img.shields.io/badge/opentofu-1.x%20-623CE4.svg?logo=terraform +[badge-tf-google]: https://img.shields.io/badge/Google-5.+-F8991D.svg?logo=terraform +[badge-tf-kubernetes]: https://img.shields.io/badge/KUBERNETES-2.+-F8991D.svg?logo=terraform diff --git a/modules/anyscale-k8s-persistent-volume/main.tf b/modules/anyscale-k8s-persistent-volume/main.tf new file mode 100644 index 0000000..f746764 --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/main.tf @@ -0,0 +1,45 @@ +locals { + module_enabled = var.module_enabled +} + +resource "kubernetes_persistent_volume" "anyscale" { + count = local.module_enabled ? 1 : 0 + metadata { + name = var.kubernetes_persistent_volume_name + } + + spec { + capacity = { + storage = var.kubernetes_persistent_volume_size + } + + access_modes = ["ReadWriteMany"] + persistent_volume_reclaim_policy = "Retain" + + storage_class_name = var.cloud_provider == "aws" ? "efs-sc" : "filestore-sc" + persistent_volume_source { + csi { + driver = var.cloud_provider == "aws" ? "efs.csi.aws.com" : "filestore.csi.storage.gke.io" + volume_handle = var.cloud_provider == "aws" ? var.aws_efs_file_system_id : "${var.gcp_filestore_ip}/${var.gcp_filestore_share_name}" + } + } + } +} + +resource "kubernetes_persistent_volume_claim" "anyscale" { + count = local.module_enabled ? 1 : 0 + metadata { + name = var.kubernetes_persistent_volume_claim_name + namespace = var.anyscale_kubernetes_namespace + } + + spec { + access_modes = ["ReadWriteMany"] + resources { + requests = { + storage = var.kubernetes_persistent_volume_size + } + } + storage_class_name = var.cloud_provider == "aws" ? "efs-sc" : "filestore-sc" + } +} diff --git a/modules/anyscale-k8s-persistent-volume/outputs.tf b/modules/anyscale-k8s-persistent-volume/outputs.tf new file mode 100644 index 0000000..ffe73a0 --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/outputs.tf @@ -0,0 +1,24 @@ +output "kubernetes_persistent_volume_name" { + description = "The name of the Kubernetes persistent volume." + value = try(kubernetes_persistent_volume.anyscale[0].metadata[0].name, "") +} + +output "kubernetes_persistent_volume_claim_name" { + description = "The name of the Kubernetes persistent volume claim." + value = try(kubernetes_persistent_volume_claim.anyscale[0].metadata[0].name, "") +} + +output "kubernetes_persistent_volume_claim_namespace" { + description = "The namespace of the Kubernetes persistent volume claim." + value = try(kubernetes_persistent_volume_claim.anyscale[0].metadata[0].namespace, "") +} + +output "kubernetes_persistent_volume_claim_volumename" { + description = "The volume name of the Kubernetes persistent volume claim." + value = try(kubernetes_persistent_volume_claim.anyscale[0].spec[0].volume_name, "") +} + +output "kubernetes_persistent_volume_claim_storageclassname" { + description = "The storage class name of the Kubernetes persistent volume claim." + value = try(kubernetes_persistent_volume_claim.anyscale[0].spec[0].storage_class_name, "") +} diff --git a/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/main.tf b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/main.tf new file mode 100644 index 0000000..67d5007 --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/main.tf @@ -0,0 +1,208 @@ +# --------------------------------------------------------------------------------------------------------------------- +# CREATE Anyscale K8s ConfigMap Resources +# This template creates EKS resources for Anyscale +# Requires: +# - VPC +# - Security Group +# - IAM Roles +# - EKS Cluster +# --------------------------------------------------------------------------------------------------------------------- +locals { + # azs = slice(data.aws_availability_zones.available.names, 0, 3) + + full_tags = merge(tomap({ + anyscale-cloud-id = var.anyscale_cloud_id, + anyscale-deploy-environment = var.anyscale_deploy_env + }), + var.tags + ) +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create resources for EKS TF Module +# Creates a VPC +# Creates a Security Group +# Creates IAM Roles +# --------------------------------------------------------------------------------------------------------------------- +locals { + public_subnets = ["172.24.101.0/24", "172.24.102.0/24", "172.24.103.0/24"] + private_subnets = ["172.24.20.0/24", "172.24.21.0/24", "172.24.22.0/24"] +} +module "eks_vpc" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-vpc" + + anyscale_vpc_name = "tftest-k8s-persistentvol" + cidr_block = "172.24.0.0/16" + + public_subnets = local.public_subnets + private_subnets = local.private_subnets +} +locals { + # Because subnet ID may not be known at plan time, we cannot use it as a key + anyscale_subnet_count = length(local.private_subnets) +} + +module "eks_securitygroup" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-securitygroups" + + vpc_id = module.eks_vpc.vpc_id + + security_group_name_prefix = "tftest-k8s-persistentvol-" + + ingress_with_self = [ + { rule = "all-all" } + ] +} + +module "eks_iam_roles" { + #checkov:skip=CKV_TF_1: Test code should use the latest version of the module + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-iam" + + module_enabled = true + create_anyscale_access_role = true + anyscale_access_role_name = "tftest-k8s-persistentvol-controlplane-role" + create_cluster_node_instance_profile = false + create_iam_s3_policy = false + + create_anyscale_eks_cluster_role = true + anyscale_eks_cluster_role_name = "tftest-k8s-persistentvol-cluster-role" + create_anyscale_eks_node_role = true + anyscale_eks_node_role_name = "tftest-k8s-persistentvol-node-role" + + anyscale_eks_cluster_oidc_arn = module.eks_cluster.eks_cluster_oidc_provider_arn + anyscale_eks_cluster_oidc_url = module.eks_cluster.eks_cluster_oidc_provider_url + + create_eks_efs_csi_driver_role = true + eks_efs_csi_role_name = "anyscale-eks-public-efs-csi-role" + efs_file_system_arn = module.anyscale_efs.efs_arn + + tags = local.full_tags +} + +module "anyscale_efs" { + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-efs" + + module_enabled = true + + anyscale_efs_name = "anyscale-eks-public-efs" + mount_targets_subnet_count = local.anyscale_subnet_count + mount_targets_subnets = module.eks_vpc.private_subnet_ids + associated_security_group_ids = [module.eks_securitygroup.security_group_id] + + tags = local.full_tags +} + +locals { + coredns_config = jsonencode({ + affinity = { + nodeAffinity = { + requiredDuringSchedulingIgnoredDuringExecution = { + nodeSelectorTerms = [ + { + matchExpressions = [ + { + key = "node-type" + operator = "In" + values = ["management"] + } + ] + } + ] + } + } + }, + nodeSelector = { + "node-type" = "management" + }, + tolerations = [ + { + key = "CriticalAddonsOnly" + operator = "Exists" + }, + { + effect = "NoSchedule" + key = "node-role.kubernetes.io/control-plane" + } + ], + replicaCount = 2 + }) +} + +module "eks_cluster" { + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-cluster" + + module_enabled = true + + anyscale_subnet_ids = module.eks_vpc.public_subnet_ids + anyscale_subnet_count = local.anyscale_subnet_count + anyscale_security_group_id = module.eks_securitygroup.security_group_id + eks_role_arn = module.eks_iam_roles.iam_anyscale_eks_cluster_role_arn + anyscale_eks_name = "tftest-k8s-persistentvol" + + tags = local.full_tags + + eks_addons = [ + # Add EFS mount + { + addon_name = "aws-efs-csi-driver" + addon_version = "v2.0.7-eksbuild.1" + service_account_role_arn = module.eks_iam_roles.iam_anyscale_eks_efs_csi_driver_role_arn + } + ] + eks_addons_depends_on = module.anyscale_eks_nodegroups + + depends_on = [module.eks_vpc, module.eks_securitygroup] +} + +module "anyscale_eks_nodegroups" { + source = "../../../../../terraform-aws-anyscale-cloudfoundation-modules/modules/aws-anyscale-eks-nodegroups" + + module_enabled = true + + eks_node_role_arn = module.eks_iam_roles.iam_anyscale_eks_node_role_arn + eks_cluster_name = module.eks_cluster.eks_cluster_name + subnet_ids = module.eks_vpc.private_subnet_ids + + tags = local.full_tags +} + + +# --------------------------------------------------------------------------------------------------------------------- +# Create Resources with no optional parameters +# --------------------------------------------------------------------------------------------------------------------- +module "all_defaults" { + source = "../../" + + module_enabled = true + cloud_provider = "aws" + + + depends_on = [module.eks_cluster] + +} + +# --------------------------------------------------------------------------------------------------------------------- +# Create Resources with as many optional parameters as possible +# --------------------------------------------------------------------------------------------------------------------- +# module "kitchen_sink" { +# source = "../../" + +# module_enabled = true +# cloud_provider = "aws" + +# anyscale_kubernetes_namespace = "tftest-k8s-persistentvol" +# depends_on = [module.eks_cluster] + +# } + +# --------------------------------------------------------------------------------------------------------------------- +# Do not create any resources +# --------------------------------------------------------------------------------------------------------------------- +module "test_no_resources" { + source = "../.." + + module_enabled = false + cloud_provider = "aws" +} diff --git a/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/outputs.tf b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/outputs.tf new file mode 100644 index 0000000..9b9ba14 --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/outputs.tf @@ -0,0 +1,23 @@ +# -------------- +# Defaults Test +# -------------- +output "all_defaults_resources" { + description = "The resources of the All Defaults test" + value = module.all_defaults +} + +# ------------------ +# Kitchen Sink Test +# ------------------ +# output "kitchen_sink_resources" { +# description = "The resources of the Kitchen Sink test" +# value = module.kitchen_sink +# } + +# ----------------- +# No resource test +# ----------------- +output "test_no_resources" { + description = "The outputs of the no_resource resource - should all be empty" + value = module.test_no_resources +} diff --git a/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/variables.tf b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/variables.tf new file mode 100644 index 0000000..9393991 --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/variables.tf @@ -0,0 +1,58 @@ +# --------------------------------------------------------------------------------------------------------------------- +# ENVIRONMENT VARIABLES +# Define these secrets as environment variables +# --------------------------------------------------------------------------------------------------------------------- + +# AWS_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY + +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED VARIABLES +# These variables must be set when using this module. +# --------------------------------------------------------------------------------------------------------------------- + +variable "aws_region" { + description = "The AWS region in which all resources will be created." + type = string + default = "us-east-2" +} + +variable "anyscale_cloud_id" { + description = "(Optional) Anyscale Cloud ID. Default is `null`." + type = string + default = null + validation { + condition = ( + var.anyscale_cloud_id == null ? true : ( + length(var.anyscale_cloud_id) > 4 && + substr(var.anyscale_cloud_id, 0, 4) == "cld_" + ) + ) + error_message = "The anyscale_cloud_id value must start with \"cld_\"." + } +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "anyscale_deploy_env" { + description = "(Optional) Anyscale deploy environment. Used in resource names and tags." + type = string + default = "production" + validation { + condition = ( + var.anyscale_deploy_env == "production" || var.anyscale_deploy_env == "development" || var.anyscale_deploy_env == "test" + ) + error_message = "The anyscale_deploy_env only allows `production`, `test`, or `development`" + } +} + +variable "tags" { + description = "(Optional) A map of tags to all resources that accept tags." + type = map(string) + default = { + "test" : true, + "environment" : "test" + } +} diff --git a/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/versions.tf b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/versions.tf new file mode 100644 index 0000000..c46128a --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/test/anyscale-aws-test/versions.tf @@ -0,0 +1,31 @@ +terraform { + required_version = ">= 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + + + +provider "kubernetes" { + host = module.eks_cluster.eks_kubeconfig.endpoint + cluster_ca_certificate = base64decode(module.eks_cluster.eks_kubeconfig.cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", module.eks_cluster.eks_cluster_name] + command = "aws" + } +} + +provider "aws" { + region = var.aws_region +} diff --git a/modules/anyscale-k8s-persistent-volume/variables.tf b/modules/anyscale-k8s-persistent-volume/variables.tf new file mode 100644 index 0000000..7644b3f --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/variables.tf @@ -0,0 +1,151 @@ +# ------------------------------------------------------------------------------ +# REQUIRED PARAMETERS +# These variables must be set when using this module. +# ------------------------------------------------------------------------------ +variable "cloud_provider" { + description = <<-EOT + (Required) The cloud provider (aws or gcp) + + ex: + ``` + cloud_provider = "aws" + ``` + EOT + type = string + validation { + condition = ( + var.cloud_provider == "aws" || var.cloud_provider == "gcp" + ) + error_message = "The cloud_provider only allows `aws` or `gcp`" + } +} + +variable "kubernetes_cluster_name" { + type = string + description = <<-EOT + (Optional) The name of the Kubernetes cluster. + + ex: + ``` + kubernetes_cluster_name = "my-cluster" + ``` + EOT + default = null +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ +variable "module_enabled" { + description = <<-EOT + (Optional) Determines if this module should create resources. + + If set to true, `eks_role_arn`, `anyscale_subnet_ids`, and `anyscale_security_group_id` must be provided. + ex: + ``` + module_enabled = true + ``` + EOT + type = bool + default = false +} + +variable "kubernetes_persistent_volume_name" { + description = <<-EOT + (Optional) The name of the Kubernetes persistent volume. + + ex: + ``` + kubernetes_persistent_volume_name = "anyscale-nfs" + ``` + EOT + type = string + default = "anyscale-nfs" +} + +variable "kubernetes_persistent_volume_size" { + description = <<-EOT + (Optional) The size of the Kubernetes persistent volume. + + When using AWS EFS, this is just a placeholder. The actual size is elastically built, making this just a placeholder + + ex: + ``` + kubernetes_persistent_volume_size = "20Gi" + ``` + EOT + type = string + default = "20Gi" +} + +variable "kubernetes_persistent_volume_claim_name" { + description = <<-EOT + (Optional) The name of the Kubernetes persistent volume claim. + + ex: + ``` + kubernetes_persistent_volume_claim_name = "anyscale-nfs-claim" + ``` + EOT + type = string + default = "anyscale-nfs-claim" +} + +variable "anyscale_kubernetes_namespace" { + description = <<-EOT + (Optional) The name of the Kubernetes namespace. + + ex: + ``` + anyscale_kubernetes_namespace = "anyscale-k8s" + ``` + EOT + type = string + default = "anyscale-k8s" +} + +variable "aws_efs_file_system_id" { + description = <<-EOT + (Optional) The ID of the EFS file system. + + Required if `cloud_provider` is `aws`. + + ex: + ``` + aws_efs_file_system_id = "fs-12345678" + ``` + EOT + type = string + default = null +} + +variable "gcp_filestore_ip" { + description = <<-EOT + (Optional) The Filestore IP address. + + Required if `cloud_provider` is `gcp`. + + ex: + ``` + gcp_filestore_ip = "172.16.0.12" + ``` + EOT + type = string + default = null +} + +variable "gcp_filestore_share_name" { + description = <<-EOT + (Optional) The Filestore share name. + + Required if `cloud_provider` is `gcp`. + + ex: + ``` + gcp_filestore_share_name = "my-share" + ``` + EOT + type = string + default = null +} diff --git a/modules/anyscale-k8s-persistent-volume/versions.tf b/modules/anyscale-k8s-persistent-volume/versions.tf new file mode 100644 index 0000000..94019cd --- /dev/null +++ b/modules/anyscale-k8s-persistent-volume/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} diff --git a/outputs.tf b/outputs.tf new file mode 100644 index 0000000..e69de29 diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..d703e72 --- /dev/null +++ b/variables.tf @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------ +# REQUIRED PARAMETERS +# These variables must be set when using this module. +# ------------------------------------------------------------------------------ +variable "cloud_provider" { + description = <<-EOT + (Required) The cloud provider (aws or gcp) + + ex: + ``` + cloud_provider = "aws" + ``` + EOT + type = string + validation { + condition = ( + var.cloud_provider == "aws" || var.cloud_provider == "gcp" + ) + error_message = "The cloud_provider only allows `aws` or `gcp`" + } +} + +variable "kubernetes_cluster_name" { + type = string + description = <<-EOT + (Optional) The name of the Kubernetes cluster. + + ex: + ``` + kubernetes_cluster_name = "my-cluster" + ``` + EOT + default = null +} + +# ------------------------------------------------------------------------------ +# OPTIONAL PARAMETERS +# These variables have defaults, but may be overridden. +# ------------------------------------------------------------------------------ + +# ------------------ +# AWS Related +# ------------------ +variable "aws_dataplane_role_arn" { + description = <<-EOT + (Optional) The ARN of the AWS IAM role that will be used by the EKS cluster to access AWS services. + + Required if `cloud_provider` is set to `aws`. + + ex: + ``` + aws_dataplane_role_arn = "arn:aws:iam::123456789012:role/my-eks-dataplane-role" + ``` + EOT + type = string + default = null +} +variable "aws_controlplane_role_arn" { + description = <<-EOT + (Optional) The ARN of the AWS IAM role that will be used by the EKS cluster to access AWS services. + + Required if `cloud_provider` is set to `aws`. + + ex: + ``` + aws_controlplane_role_arn = "arn:aws:iam::123456789012:role/my-eks-controlplane-role" + ``` + EOT + type = string + default = null +} diff --git a/versions.tf b/versions.tf new file mode 100644 index 0000000..8a43595 --- /dev/null +++ b/versions.tf @@ -0,0 +1,20 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + helm = { + source = "hashicorp/helm" + version = "~> 2.0" + } + + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + + time = { + source = "hashicorp/time" + version = ">= 0.12" + } + } +}