diff --git a/README.md b/README.md index 80fe0e47..a40774fd 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,8 @@ your custom applications. You also can monitor your Amazon Managed Service for Prometheus workspaces ingestion, costs, active series with [this module](./modules/managed-prometheus-monitoring). -image +![image](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/e83f8709-f754-4192-90f2-e3de96d2e26c) + ## Documentation @@ -33,15 +34,6 @@ visit the [Amazon EKS cluster monitoring documentation](https://aws-observabilit The sections below demonstrate how you can leverage AWS Observability Accelerator to enable monitoring to an existing EKS cluster. -### v2.x changes - -v2+ releases introduces couple of breaking changes compared to previous versions: - -- `modules/workloads/infra` module moves to `modules/eks-monitoring` -- All EKS configuration options moves from the base module to the `eks-monitoring` module -- All EKS workload modules `modules/workloads/{java,nginx}` merge into `eks-monitoring` as configuration options (patterns), see [examples](./examples) to provide a more complete visibility -- All examples have been updated to reflect these changes -- Introducing GitOps for Grafana contents (Dashboards, Folders and Data sources) with [Grafana Operator](https://github.com/grafana-operator/grafana-operator) and [Flux CD](https://fluxcd.io/) ### Base Module @@ -161,14 +153,13 @@ If you are interested in contributing, see the | [terraform](#requirement\_terraform) | >= 1.1.0 | | [aws](#requirement\_aws) | >= 4.0.0 | | [awscc](#requirement\_awscc) | >= 0.24.0 | -| [grafana](#requirement\_grafana) | 1.25.0 | +| [grafana](#requirement\_grafana) | >= 1.25.0 | ## Providers | Name | Version | |------|---------| | [aws](#provider\_aws) | >= 4.0.0 | -| [grafana](#provider\_grafana) | 1.25.0 | ## Modules @@ -180,8 +171,6 @@ No modules. |------|------| | [aws_prometheus_alert_manager_definition.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_alert_manager_definition) | resource | | [aws_prometheus_workspace.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_workspace) | resource | -| [grafana_data_source.amp](https://registry.terraform.io/providers/grafana/grafana/1.25.0/docs/resources/data_source) | resource | -| [grafana_folder.this](https://registry.terraform.io/providers/grafana/grafana/1.25.0/docs/resources/folder) | resource | | [aws_grafana_workspace.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/grafana_workspace) | data source | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | @@ -190,12 +179,10 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [aws\_region](#input\_aws\_region) | AWS Region | `string` | n/a | yes | -| [create\_dashboard\_folder](#input\_create\_dashboard\_folder) | Boolean flag to enable Amazon Managed Grafana folder and dashboards | `bool` | `true` | no | -| [create\_prometheus\_data\_source](#input\_create\_prometheus\_data\_source) | Boolean flag to enable Amazon Managed Grafana datasource | `bool` | `true` | no | | [enable\_alertmanager](#input\_enable\_alertmanager) | Creates Amazon Managed Service for Prometheus AlertManager for all workloads | `bool` | `false` | no | | [enable\_managed\_prometheus](#input\_enable\_managed\_prometheus) | Creates a new Amazon Managed Service for Prometheus Workspace | `bool` | `true` | no | | [grafana\_api\_key](#input\_grafana\_api\_key) | Grafana API key for the Amazon Managed Grafana workspace | `string` | n/a | yes | -| [managed\_grafana\_workspace\_id](#input\_managed\_grafana\_workspace\_id) | Amazon Managed Grafana Workspace ID | `string` | `""` | no | +| [managed\_grafana\_workspace\_id](#input\_managed\_grafana\_workspace\_id) | Amazon Managed Grafana Workspace ID | `string` | n/a | yes | | [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Service for Prometheus Workspace ID | `string` | `""` | no | | [managed\_prometheus\_workspace\_region](#input\_managed\_prometheus\_workspace\_region) | Region where Amazon Managed Service for Prometheus is deployed | `string` | `null` | no | | [tags](#input\_tags) | Additional tags (e.g. `map('BusinessUnit`,`XYZ`) | `map(string)` | `{}` | no | @@ -205,15 +192,10 @@ No modules. | Name | Description | |------|-------------| | [aws\_region](#output\_aws\_region) | AWS Region | -| [grafana\_dashboard\_folder\_created](#output\_grafana\_dashboard\_folder\_created) | Boolean value indicating if the module created a dashboard folder in Amazon Managed Grafana | -| [grafana\_dashboards\_folder\_id](#output\_grafana\_dashboards\_folder\_id) | Grafana folder ID for automatic dashboards. Required by workload modules | -| [grafana\_prometheus\_datasource\_test](#output\_grafana\_prometheus\_datasource\_test) | Grafana save & test URL for Amazon Managed Prometheus workspace | | [managed\_grafana\_workspace\_endpoint](#output\_managed\_grafana\_workspace\_endpoint) | Amazon Managed Grafana workspace endpoint | -| [managed\_grafana\_workspace\_id](#output\_managed\_grafana\_workspace\_id) | Amazon Managed Grafana workspace ID | | [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint | | [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID | | [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | Amazon Managed Prometheus workspace region | -| [prometheus\_data\_source\_created](#output\_prometheus\_data\_source\_created) | Boolean value indicating if the module created a prometheus data source in Amazon Managed Grafana | ## Contributing diff --git a/docs/concepts.md b/docs/concepts.md index d71ca655..4c659f8e 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -39,22 +39,24 @@ The grafana-operator is a Kubernetes operator built to help you manage your Graf GitOps is a way of managing application and infrastructure deployment so that the whole system is described declaratively in a Git repository. It is an operational model that offers you the ability to manage the state of multiple Kubernetes clusters leveraging the best practices of version control, immutable artifacts, and automation. Flux is a declarative, GitOps-based continuous delivery tool that can be integrated into any CI/CD pipeline. It gives users the flexibility of choosing their Git provider (GitHub, GitLab, BitBucket). Now, with grafana-operator supporting the management of external Grafana instances such as Amazon Managed Grafana, operations personas can use GitOps mechanisms using CNCF projects such as Flux to create and manage the lifecycle of resources in Amazon Managed Grafana. -We have setup a [GitRepository](https://fluxcd.io/flux/components/source/gitrepositories/) and [Kustomization](https://fluxcd.io/flux/components/kustomize/kustomization/) using flux to sync our GitHub Repository to add Grafana Datasources, folder and Dashboards to Amazon Managed Grafana using Grafana Operator. GitRepository defines a Source to produce an Artifact for a Git repository revision. Kustomization defines a pipeline for fetching, decrypting, building, validating and applying Kustomize overlays or plain Kubernetes manifests. we are also using [Flux Post build variable substitution](https://fluxcd.io/flux/components/kustomize/kustomization/#post-build-variable-substitution) to dynamically render variables such as AMG_AWS_REGION, AMP_ENDPOINT_URL, AMG_ENDPOINT_URL,GRAFANA_NODEEXP_DASH_URL on the YAML manifests during deployment time to avoid hardcoding on the YAML manifests stored in Git repo. +We have setup a [GitRepository](https://fluxcd.io/flux/components/source/gitrepositories/) and [Kustomization](https://fluxcd.io/flux/components/kustomize/kustomization/) using Flux to sync our GitHub Repository to add Grafana Datasources, folder and Dashboards to Amazon Managed Grafana using Grafana Operator. GitRepository defines a Source to produce an Artifact for a Git repository revision. Kustomization defines a pipeline for fetching, decrypting, building, validating and applying Kustomize overlays or plain Kubernetes manifests. we are also using [Flux Post build variable substitution](https://fluxcd.io/flux/components/kustomize/kustomization/#post-build-variable-substitution) to dynamically render variables such as AMG_AWS_REGION, AMP_ENDPOINT_URL, AMG_ENDPOINT_URL,GRAFANA_NODEEXP_DASH_URL on the YAML manifests during deployment time to avoid hardcoding on the YAML manifests stored in Git repo. -We have placed our declarative code snippet to create an Amazon Managed Service For Promethes datasource and Grafana Dashboard in Amazon Managed Grafana in our [AWS Observabiity Accelerator GitHub Repository](https://github.com/aws-observability/aws-observability-accelerator/tree/main/artifacts/grafana-operator-manifests). We have setup a GitRepository to point to the AWS Observabiity Accelerator GitHub Repository and `Kustomization` for flux to sync Git Repository with artifacts in `./artifacts/grafana-operator-manifests` path in the AWS Observabiity Accelerator GitHub Repository. You can use this extension of our solution to point your own Kubernetes manifests to create Grafana Datasources and personified Grafana Dashboards of your choice using GitOps with Grafana Operator and Flux in Kubernetes native way with altering and redeploying this solution for changes to Grafana resources. +We have placed our declarative code snippet to create an Amazon Managed Service For Promethes datasource and Grafana Dashboard in Amazon Managed Grafana in our [AWS Observabiity Accelerator GitHub Repository](https://github.com/aws-observability/aws-observability-accelerator). We have setup a GitRepository to point to the AWS Observabiity Accelerator GitHub Repository and `Kustomization` for flux to sync Git Repository with artifacts in `./artifacts/grafana-operator-manifests/*` path in the AWS Observabiity Accelerator GitHub Repository. You can use this extension of our solution to point your own Kubernetes manifests to create Grafana Datasources and personified Grafana Dashboards of your choice using GitOps with Grafana Operator and Flux in Kubernetes native way with altering and redeploying this solution for changes to Grafana resources. -## v2.x changes +## Release notes -v2.x [releases](https://github.com/aws-observability/terraform-aws-observability-accelerator/releases) introduce -couple of breaking changes compared to previous versions: +We encourage you to use our [release versions](https://github.com/aws-observability/terraform-aws-observability-accelerator/releases) +as much as possible to avoid breaking changes when deploying Terraform modules. You can +read also our change log on the releases page. Here's an example of using a fixed version: + +```hcl +module "eks_monitoring" { + source = "github.com/aws-observability/terraform-aws-observability-accelerator//modules/managed-prometheus-monitoring?ref=v2.5.0" +} +``` -- `modules/workloads/infra` module moves to `modules/eks-monitoring` -- EKS configuration options moves from the base module to the `eks-monitoring` module -- EKS workload modules **java,nginx** merge into `eks-monitoring` as configuration options (patterns), -see [examples](https://github.com/aws-observability/terraform-aws-observability-accelerator/tree/main/examples) -- Examples have been updated to reflect these changes ## Base module @@ -138,4 +140,4 @@ classDiagram If you are new to AWS Observability services, or want to dive deeper into them, check our [One Observability Workshop](https://catalog.workshops.aws/observability/) -for a hands-on experience in a self-paced environement or at an AWS venue. +for a hands-on experience in a self-paced environment or at an AWS venue. diff --git a/docs/eks/index.md b/docs/eks/index.md index 95faaa96..81def632 100644 --- a/docs/eks/index.md +++ b/docs/eks/index.md @@ -111,25 +111,40 @@ terraform apply ## Visualization -#### 1. Prometheus data source on Grafana -Make sure to open the link in the output. After a successful deployment, this will open -the Prometheus data source configuration on Grafana. -Click `Save & test` and you should see a notification confirming that the Amazon Managed Service for Prometheus workspace is ready to be used on Grafana. +#### 1. Grafana dashboards -```bash -terraform output grafana_prometheus_datasource_test -``` +Login to your Grafana workspace and navigate to the Dashboards panel. You should see a list of dashboards under the `Observability Accelerator Dashboards` +image -#### 2. Grafana dashboards +Open a specific dashboard and you should be able to view its visualization +cluster headlines -Go to the Dashboards panel of your Grafana workspace. You should see a list of dashboards under the `Observability Accelerator Dashboards` +With v2.5 and above, the dashboards are managed with a Grafana Operator running in your cluster. +From the cluster to view all dashboards as Kubernetes objects, run -image +```console +kubectl get grafanadashboards -A +NAMESPACE NAME AGE +grafana-operator cluster-grafanadashboard 138m +grafana-operator java-grafanadashboard 143m +grafana-operator kubelet-grafanadashboard 13h +grafana-operator namespace-workloads-grafanadashboard 13h +grafana-operator nginx-grafanadashboard 134m +grafana-operator node-exporter-grafanadashboard 13h +grafana-operator nodes-grafanadashboard 13h +grafana-operator workloads-grafanadashboard 13h +``` -Open a specific dashboard and you should be able to view its visualization +You can inspect more details per dashboard using this command + +```console +kubectl describe grafanadashboards cluster-grafanadashboard -n grafana-operator +``` + +Grafana Operator and Flux always work together to synchronize your dashboards with Git. +If you delete your dashboards by accident, they will be re-provisioned automatically. -cluster headlines #### 3. Amazon Managed Service for Prometheus rules and alerts @@ -216,19 +231,20 @@ export GO_AMG_API_KEY=$(aws grafana create-workspace-api-key \ --output text) ``` -- Next, lets grab the Grafana API key secret name from AWS Secrets Manager. The keyname should start with `terraform-..` +- Finally, update the Grafana API key secret in AWS Secrets Manager using the above new Grafana API key: ```bash -aws secretsmanager list-secrets +aws aws ssm put-parameter \ + --name "/terraform-accelerator/grafana-api-key" \ + --type "SecureString" \ + --value "{\"GF_SECURITY_ADMIN_APIKEY\": \"${GO_AMG_API_KEY}\"}" \ + --region ``` -- Finally, update the Grafana API key secret in AWS Secrets Manager using the above new Grafana API key: +- If the issue persists, you can force the synchronization by deleting the `externalsecret` Kubernetes object. ```bash -aws secretsmanager update-secret \ - --secret-id \ - --secret-string "{\"GF_SECURITY_ADMIN_APIKEY\": \"${GO_AMG_API_KEY}\"}" \ - --region +kubectl delete externalsecret/external-secrets-sm -n grafana-operator ``` ### 2. Upgrade from 2.1.0 or earlier diff --git a/docs/eks/java.md b/docs/eks/java.md index c699d037..58b4a13b 100644 --- a/docs/eks/java.md +++ b/docs/eks/java.md @@ -32,7 +32,7 @@ Make sure to refresh your temporary Grafana API key ```bash export TF_VAR_managed_grafana_workspace_id=g-xxx -export TF_VAR_grafana_api_key=`aws grafana create-workspace-api-key --key-name "observability-accelerator-$(date +%s)" --key-role ADMIN --seconds-to-live 1200 --workspace-id $TF_VAR_managed_grafana_workspace_id --query key --output text` +export TF_VAR_grafana_api_key=`aws grafana create-workspace-api-key --key-name "observability-accelerator-$(date +%s)" --key-role ADMIN --seconds-to-live 7200 --workspace-id $TF_VAR_managed_grafana_workspace_id --query key --output text` ``` ## Deploy diff --git a/docs/eks/multicluster.md b/docs/eks/multicluster.md index 51c3f14e..560d99e4 100644 --- a/docs/eks/multicluster.md +++ b/docs/eks/multicluster.md @@ -11,7 +11,7 @@ Using the example [eks-cluster-with-vpc](https://aws-observability.github.io/ter 1. `eks-cluster-1` 2. `eks-cluster-2` -#### 2. Amazon Managed Serivce for Prometheus (AMP) workspace +#### 2. Amazon Managed Service for Prometheus (AMP) workspace We recommend that you create a new AMP workspace. To do that you can run the following command. @@ -48,7 +48,7 @@ Ensure you have the following necessary IAM permissions * `grafana.DeleteWorkspaceApiKey` ```sh -export TF_VAR_grafana_api_key=`aws grafana create-workspace-api-key --key-name "observability-accelerator-$(date +%s)" --key-role ADMIN --seconds-to-live 1200 --workspace-id $TF_VAR_managed_grafana_workspace_id --query key --output text` +export TF_VAR_grafana_api_key=`aws grafana create-workspace-api-key --key-name "observability-accelerator-$(date +%s)" --key-role ADMIN --seconds-to-live 7200 --workspace-id $TF_VAR_managed_grafana_workspace_id --query key --output text` ``` ## Setup @@ -70,8 +70,8 @@ Verify by looking at the file `variables.tf` that there are two EKS clusters tar The difference in deployment between these clusters is that Terraform, when setting up the EKS cluster behind variable `eks_cluster_1_id` for observability, also sets up: -* Dashboard folder and files in `AMG` -* Prometheus and Java, alerting and recording rules in `AMP` +* Dashboard folder and files in Amazon Managed Grafana +* Prometheus and Java, alerting and recording rules in Amazon Managed Service for Prometheus !!! warning To override the defaults, create a `terraform.tfvars` and change the default values of the variables. diff --git a/docs/index.md b/docs/index.md index 79d71cf8..4407a022 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,7 +14,7 @@ your custom applications. You also can monitor your Amazon Managed Service for Prometheus workspaces ingestion, costs, active series with [this module](https://aws-observability.github.io/terraform-aws-observability-accelerator/workloads/managed-prometheus/). -image +![image](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/e83f8709-f754-4192-90f2-e3de96d2e26c) ## Getting started @@ -26,7 +26,7 @@ traces collection, dashboards and alerts for monitoring: - Java/JMX workloads (running on Amazon EKS) - Amazon Managed Service for Prometheus workspaces with Amazon CloudWatch - [Grafana Operator](https://github.com/grafana-operator/grafana-operator) and [Flux CD](https://fluxcd.io/) to manage Grafana contents (AWS data sources, Grafana Dashboards) with GitOps -- External Secrets Operator to retrieve and Sync the Grafana API keys +- External Secrets Operator to retrieve and sync the Grafana API keys These modules can be directly configured in your existing Terraform configurations or ready to be deployed in our packaged diff --git a/examples/eks-multicluster/main.tf b/examples/eks-multicluster/main.tf index 3991d591..fba7cf81 100644 --- a/examples/eks-multicluster/main.tf +++ b/examples/eks-multicluster/main.tf @@ -3,8 +3,6 @@ module "aws_observability_accelerator" { aws_region = var.eks_cluster_1_region enable_managed_prometheus = false enable_alertmanager = true - create_dashboard_folder = true - create_prometheus_data_source = true grafana_api_key = var.grafana_api_key managed_prometheus_workspace_region = null managed_prometheus_workspace_id = var.managed_prometheus_workspace_id @@ -24,23 +22,18 @@ module "eks_cluster_1_monitoring" { # This configuration section results in actions performed on AMG and AMP; and it needs to be done just once # And hence, this in performed in conjunction with the setup of the eks_cluster_1 EKS cluster - enable_dashboards = true - enable_alerting_rules = true - enable_recording_rules = true + enable_dashboards = true + enable_external_secrets = true + enable_fluxcd = true + enable_alerting_rules = true + enable_recording_rules = true grafana_api_key = var.grafana_api_key - dashboards_folder_id = module.aws_observability_accelerator.grafana_dashboards_folder_id managed_prometheus_workspace_id = module.aws_observability_accelerator.managed_prometheus_workspace_id managed_prometheus_workspace_endpoint = module.aws_observability_accelerator.managed_prometheus_workspace_endpoint managed_prometheus_workspace_region = module.aws_observability_accelerator.managed_prometheus_workspace_region grafana_url = module.aws_observability_accelerator.managed_grafana_workspace_endpoint - java_config = { - enable_alerting_rules = true - enable_recording_rules = true - scrape_sample_limit = 1 - } - prometheus_config = { global_scrape_interval = "60s" global_scrape_timeout = "15s" @@ -68,23 +61,17 @@ module "eks_cluster_2_monitoring" { # Since the following were enabled in conjunction with the set up of the # eks_cluster_1 EKS cluster, we will skip them with the eks_cluster_2 EKS cluster - enable_dashboards = false - enable_alerting_rules = false - enable_recording_rules = false + enable_dashboards = false + enable_external_secrets = false + enable_fluxcd = false + enable_alerting_rules = false + enable_recording_rules = false - grafana_api_key = var.grafana_api_key - dashboards_folder_id = module.aws_observability_accelerator.grafana_dashboards_folder_id managed_prometheus_workspace_id = module.aws_observability_accelerator.managed_prometheus_workspace_id managed_prometheus_workspace_endpoint = module.aws_observability_accelerator.managed_prometheus_workspace_endpoint managed_prometheus_workspace_region = module.aws_observability_accelerator.managed_prometheus_workspace_region grafana_url = module.aws_observability_accelerator.managed_grafana_workspace_endpoint - java_config = { - enable_alerting_rules = false # addressed while setting up the eks_cluster_1 EKS cluster - enable_recording_rules = false # addressed while setting up the eks_cluster_1 EKS cluster - scrape_sample_limit = 1 - } - prometheus_config = { global_scrape_interval = "60s" global_scrape_timeout = "15s" diff --git a/examples/existing-cluster-java/README.md b/examples/existing-cluster-java/README.md index 1b066929..f96fc551 100644 --- a/examples/existing-cluster-java/README.md +++ b/examples/existing-cluster-java/README.md @@ -236,8 +236,6 @@ terraform destroy -var-file=terraform.tfvars | [aws\_region](#output\_aws\_region) | AWS Region | | [eks\_cluster\_id](#output\_eks\_cluster\_id) | EKS Cluster Id | | [eks\_cluster\_version](#output\_eks\_cluster\_version) | EKS Cluster version | -| [grafana\_dashboard\_urls](#output\_grafana\_dashboard\_urls) | URLs for dashboards created | -| [grafana\_prometheus\_datasource\_test](#output\_grafana\_prometheus\_datasource\_test) | Grafana save & test URL for Amazon Managed Prometheus workspace | | [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint | | [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID | diff --git a/examples/existing-cluster-java/main.tf b/examples/existing-cluster-java/main.tf index 4802f8f5..cf28d4a7 100644 --- a/examples/existing-cluster-java/main.tf +++ b/examples/existing-cluster-java/main.tf @@ -79,10 +79,8 @@ module "eks_monitoring" { eks_cluster_id = var.eks_cluster_id # control the publishing of dashboards by specifying the boolean value for the variable 'enable_dashboards', default is 'true' - # the intention to publish is overruled depending upon whether grafana dashboard folder is created by the observability accelerator - enable_dashboards = module.aws_observability_accelerator.grafana_dashboard_folder_created ? var.enable_dashboards : false + enable_dashboards = var.enable_dashboards - dashboards_folder_id = module.aws_observability_accelerator.grafana_dashboards_folder_id managed_prometheus_workspace_id = module.aws_observability_accelerator.managed_prometheus_workspace_id managed_prometheus_workspace_endpoint = module.aws_observability_accelerator.managed_prometheus_workspace_endpoint diff --git a/examples/existing-cluster-java/outputs.tf b/examples/existing-cluster-java/outputs.tf index 586cffdc..ad1c3405 100644 --- a/examples/existing-cluster-java/outputs.tf +++ b/examples/existing-cluster-java/outputs.tf @@ -13,16 +13,6 @@ output "managed_prometheus_workspace_id" { value = module.aws_observability_accelerator.managed_prometheus_workspace_id } -output "grafana_dashboard_urls" { - description = "URLs for dashboards created" - value = module.eks_monitoring.grafana_dashboard_urls -} - -output "grafana_prometheus_datasource_test" { - description = "Grafana save & test URL for Amazon Managed Prometheus workspace" - value = module.aws_observability_accelerator.grafana_prometheus_datasource_test -} - output "eks_cluster_version" { description = "EKS Cluster version" value = module.eks_monitoring.eks_cluster_version diff --git a/examples/existing-cluster-nginx/README.md b/examples/existing-cluster-nginx/README.md index 15f1c938..5ef1fc77 100644 --- a/examples/existing-cluster-nginx/README.md +++ b/examples/existing-cluster-nginx/README.md @@ -247,8 +247,6 @@ add this `managed_prometheus_region=xxx` and `managed_prometheus_workspace_id=ws | [aws\_region](#output\_aws\_region) | AWS Region | | [eks\_cluster\_id](#output\_eks\_cluster\_id) | EKS Cluster Id | | [eks\_cluster\_version](#output\_eks\_cluster\_version) | EKS Cluster version | -| [grafana\_dashboard\_urls](#output\_grafana\_dashboard\_urls) | URLs for dashboards created | -| [grafana\_prometheus\_datasource\_test](#output\_grafana\_prometheus\_datasource\_test) | Grafana save & test URL for Amazon Managed Prometheus workspace | | [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint | | [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID | diff --git a/examples/existing-cluster-nginx/main.tf b/examples/existing-cluster-nginx/main.tf index 1ca5da2c..9667cabd 100644 --- a/examples/existing-cluster-nginx/main.tf +++ b/examples/existing-cluster-nginx/main.tf @@ -75,10 +75,8 @@ module "eks_monitoring" { grafana_url = module.aws_observability_accelerator.managed_grafana_workspace_endpoint # control the publishing of dashboards by specifying the boolean value for the variable 'enable_dashboards', default is 'true' - # the intention to publish is overruled depending upon whether grafana dashboard folder is created by the observability accelerator - enable_dashboards = module.aws_observability_accelerator.grafana_dashboard_folder_created ? var.enable_dashboards : false + enable_dashboards = var.enable_dashboards - dashboards_folder_id = module.aws_observability_accelerator.grafana_dashboards_folder_id managed_prometheus_workspace_id = module.aws_observability_accelerator.managed_prometheus_workspace_id managed_prometheus_workspace_endpoint = module.aws_observability_accelerator.managed_prometheus_workspace_endpoint diff --git a/examples/existing-cluster-nginx/outputs.tf b/examples/existing-cluster-nginx/outputs.tf index 586cffdc..ad1c3405 100644 --- a/examples/existing-cluster-nginx/outputs.tf +++ b/examples/existing-cluster-nginx/outputs.tf @@ -13,16 +13,6 @@ output "managed_prometheus_workspace_id" { value = module.aws_observability_accelerator.managed_prometheus_workspace_id } -output "grafana_dashboard_urls" { - description = "URLs for dashboards created" - value = module.eks_monitoring.grafana_dashboard_urls -} - -output "grafana_prometheus_datasource_test" { - description = "Grafana save & test URL for Amazon Managed Prometheus workspace" - value = module.aws_observability_accelerator.grafana_prometheus_datasource_test -} - output "eks_cluster_version" { description = "EKS Cluster version" value = module.eks_monitoring.eks_cluster_version diff --git a/examples/existing-cluster-with-base-and-infra/README.md b/examples/existing-cluster-with-base-and-infra/README.md index 7207e055..ac90123c 100644 --- a/examples/existing-cluster-with-base-and-infra/README.md +++ b/examples/existing-cluster-with-base-and-infra/README.md @@ -52,7 +52,7 @@ View the full documentation for this example [here](https://aws-observability.gi |------|-------------|------|---------|:--------:| | [aws\_region](#input\_aws\_region) | AWS Region | `string` | n/a | yes | | [eks\_cluster\_id](#input\_eks\_cluster\_id) | Name of the EKS cluster | `string` | `"eks-cluster-with-vpc"` | no | -| [enable\_dashboards](#input\_enable\_dashboards) | Enables or disables curated dashboards | `bool` | `true` | no | +| [enable\_dashboards](#input\_enable\_dashboards) | Enables or disables curated dashboards. Dashboards are managed by the Grafana Operator | `bool` | `true` | no | | [grafana\_api\_key](#input\_grafana\_api\_key) | API key for authorizing the Grafana provider to make changes to Amazon Managed Grafana | `string` | n/a | yes | | [managed\_grafana\_workspace\_id](#input\_managed\_grafana\_workspace\_id) | Amazon Managed Grafana Workspace ID | `string` | n/a | yes | | [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Service for Prometheus Workspace ID | `string` | `""` | no | @@ -64,9 +64,4 @@ View the full documentation for this example [here](https://aws-observability.gi | [aws\_region](#output\_aws\_region) | AWS Region | | [eks\_cluster\_id](#output\_eks\_cluster\_id) | EKS Cluster Id | | [eks\_cluster\_version](#output\_eks\_cluster\_version) | EKS Cluster version | -| [grafana\_dashboard\_urls](#output\_grafana\_dashboard\_urls) | URLs for dashboards created | -| [grafana\_prometheus\_datasource\_test](#output\_grafana\_prometheus\_datasource\_test) | Grafana save & test URL for Amazon Managed Prometheus workspace | -| [managed\_grafana\_workspace\_id](#output\_managed\_grafana\_workspace\_id) | Amazon Managed Grafana workspace ID | -| [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint | -| [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID | diff --git a/examples/existing-cluster-with-base-and-infra/main.tf b/examples/existing-cluster-with-base-and-infra/main.tf index f4102245..8e528a53 100644 --- a/examples/existing-cluster-with-base-and-infra/main.tf +++ b/examples/existing-cluster-with-base-and-infra/main.tf @@ -50,6 +50,8 @@ module "aws_observability_accelerator" { enable_alertmanager = true # reusing existing Amazon Managed Grafana workspace + # This is not needed anymore but kept here for a two step transition into + # removing the Terraform Grafana provider managed_grafana_workspace_id = var.managed_grafana_workspace_id grafana_api_key = var.grafana_api_key @@ -85,10 +87,8 @@ module "eks_monitoring" { grafana_url = module.aws_observability_accelerator.managed_grafana_workspace_endpoint # control the publishing of dashboards by specifying the boolean value for the variable 'enable_dashboards', default is 'true' - # the intention to publish is overruled depending upon whether grafana dashboard folder is created by the observability accelerator - enable_dashboards = module.aws_observability_accelerator.grafana_dashboard_folder_created ? var.enable_dashboards : false + enable_dashboards = var.enable_dashboards - dashboards_folder_id = module.aws_observability_accelerator.grafana_dashboards_folder_id managed_prometheus_workspace_id = module.aws_observability_accelerator.managed_prometheus_workspace_id managed_prometheus_workspace_endpoint = module.aws_observability_accelerator.managed_prometheus_workspace_endpoint diff --git a/examples/existing-cluster-with-base-and-infra/outputs.tf b/examples/existing-cluster-with-base-and-infra/outputs.tf index b05adf70..f8b4d584 100644 --- a/examples/existing-cluster-with-base-and-infra/outputs.tf +++ b/examples/existing-cluster-with-base-and-infra/outputs.tf @@ -3,30 +3,6 @@ output "aws_region" { value = module.aws_observability_accelerator.aws_region } -output "managed_prometheus_workspace_endpoint" { - description = "Amazon Managed Prometheus workspace endpoint" - value = module.aws_observability_accelerator.managed_prometheus_workspace_endpoint -} - -output "managed_prometheus_workspace_id" { - description = "Amazon Managed Prometheus workspace ID" - value = module.aws_observability_accelerator.managed_prometheus_workspace_id -} - -output "managed_grafana_workspace_id" { - description = "Amazon Managed Grafana workspace ID" - value = module.aws_observability_accelerator.managed_grafana_workspace_id -} - -output "grafana_dashboard_urls" { - description = "URLs for dashboards created" - value = module.eks_monitoring.grafana_dashboard_urls -} - -output "grafana_prometheus_datasource_test" { - description = "Grafana save & test URL for Amazon Managed Prometheus workspace" - value = module.aws_observability_accelerator.grafana_prometheus_datasource_test -} output "eks_cluster_version" { description = "EKS Cluster version" value = module.eks_monitoring.eks_cluster_version diff --git a/examples/existing-cluster-with-base-and-infra/variables.tf b/examples/existing-cluster-with-base-and-infra/variables.tf index 3478fbca..6df0e2e5 100644 --- a/examples/existing-cluster-with-base-and-infra/variables.tf +++ b/examples/existing-cluster-with-base-and-infra/variables.tf @@ -27,7 +27,7 @@ variable "grafana_api_key" { } variable "enable_dashboards" { - description = "Enables or disables curated dashboards" + description = "Enables or disables curated dashboards. Dashboards are managed by the Grafana Operator" type = bool default = true } diff --git a/locals.tf b/locals.tf index 5da0a544..8e823659 100644 --- a/locals.tf +++ b/locals.tf @@ -4,6 +4,7 @@ data "aws_grafana_workspace" "this" { workspace_id = var.managed_grafana_workspace_id } + locals { # if region is not passed, we assume the current one amp_ws_region = coalesce(var.managed_prometheus_workspace_region, data.aws_region.current.name) @@ -11,7 +12,6 @@ locals { amp_ws_endpoint = "https://aps-workspaces.${local.amp_ws_region}.amazonaws.com/workspaces/${local.amp_ws_id}/" amg_ws_endpoint = "https://${data.aws_grafana_workspace.this.endpoint}" - amg_ws_id = var.managed_grafana_workspace_id name = "aws-observability-accelerator" } diff --git a/main.tf b/main.tf index 8911b422..287e502d 100644 --- a/main.tf +++ b/main.tf @@ -5,6 +5,11 @@ resource "aws_prometheus_workspace" "this" { tags = var.tags } +provider "grafana" { + url = local.amg_ws_endpoint + auth = var.grafana_api_key +} + resource "aws_prometheus_alert_manager_definition" "this" { count = var.enable_alertmanager ? 1 : 0 @@ -18,28 +23,3 @@ alertmanager_config: | - name: 'default' EOF } - -provider "grafana" { - url = local.amg_ws_endpoint - auth = var.grafana_api_key -} - -resource "grafana_data_source" "amp" { - count = var.create_prometheus_data_source ? 1 : 0 - type = "prometheus" - name = local.name - is_default = true - url = local.amp_ws_endpoint - json_data { - http_method = "GET" - sigv4_auth = true - sigv4_auth_type = "workspace-iam-role" - sigv4_region = local.amp_ws_region - } -} - -# dashboards -resource "grafana_folder" "this" { - count = var.create_dashboard_folder ? 1 : 0 - title = "Observability Accelerator Dashboards" -} diff --git a/modules/eks-monitoring/README.md b/modules/eks-monitoring/README.md index 8534f52e..503dd909 100644 --- a/modules/eks-monitoring/README.md +++ b/modules/eks-monitoring/README.md @@ -67,7 +67,6 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [custom\_metrics\_config](#input\_custom\_metrics\_config) | Configuration object to enable custom metrics collection |
object({
ports = list(number)
# paths = optional(list(string), ["/metrics"])
# list of samples to be dropped by label prefix, ex: go_ -> discards go_.*
dropped_series_prefixes = list(string)
})
|
{
"dropped_series_prefixes": [
"unspecified"
],
"ports": []
}
| no | -| [dashboards\_folder\_id](#input\_dashboards\_folder\_id) | Grafana folder ID for automatic dashboards | `string` | n/a | yes | | [eks\_cluster\_id](#input\_eks\_cluster\_id) | EKS Cluster Id | `string` | n/a | yes | | [enable\_alerting\_rules](#input\_enable\_alerting\_rules) | Enables or disables Managed Prometheus alerting rules | `bool` | `true` | no | | [enable\_amazon\_eks\_adot](#input\_enable\_amazon\_eks\_adot) | Enables the ADOT Operator on the EKS Cluster | `bool` | `true` | no | @@ -86,11 +85,12 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this | [enable\_tracing](#input\_enable\_tracing) | (Experimental) Enables tracing with AWS X-Ray. This changes the deploy mode of the collector to daemon set. Requirement: adot add-on <= 0.58-build.0 | `bool` | `false` | no | | [flux\_config](#input\_flux\_config) | FluxCD configuration |
object({
create_namespace = bool
k8s_namespace = string
helm_chart_name = string
helm_chart_version = string
helm_release_name = string
helm_repo_url = string
helm_settings = map(string)
helm_values = map(any)
})
|
{
"create_namespace": true,
"helm_chart_name": "flux2",
"helm_chart_version": "2.7.0",
"helm_release_name": "observability-fluxcd-addon",
"helm_repo_url": "https://fluxcd-community.github.io/helm-charts",
"helm_settings": {},
"helm_values": {},
"k8s_namespace": "flux-system"
}
| no | | [flux\_gitrepository\_branch](#input\_flux\_gitrepository\_branch) | Flux GitRepository Branch | `string` | `"main"` | no | +| [flux\_gitrepository\_name](#input\_flux\_gitrepository\_name) | Flux GitRepository name | `string` | `"aws-observability-accelerator"` | no | | [flux\_gitrepository\_url](#input\_flux\_gitrepository\_url) | Flux GitRepository URL | `string` | `"https://github.com/aws-observability/aws-observability-accelerator"` | no | -| [flux\_kustomization\_path](#input\_flux\_kustomization\_path) | Flux Kustomization Path | `string` | `"./artifacts/grafana-operator-manifests"` | no | -| [flux\_name](#input\_flux\_name) | Flux GitRepository and Kustomization Name | `string` | `"grafana-dashboards"` | no | -| [go\_config](#input\_go\_config) | Grafana Operator configuration |
object({
create_namespace = bool
helm_chart = string
helm_name = string
k8s_namespace = string
helm_release_name = string
helm_chart_version = string
})
|
{
"create_namespace": true,
"helm_chart": "oci://ghcr.io/grafana-operator/helm-charts/grafana-operator",
"helm_chart_version": "v5.0.0-rc1",
"helm_name": "grafana-operator",
"helm_release_name": "grafana-operator",
"k8s_namespace": "grafana-operator"
}
| no | -| [grafana\_api\_key](#input\_grafana\_api\_key) | Grafana API key for the Amazon Managed Grafana workspace | `string` | n/a | yes | +| [flux\_kustomization\_name](#input\_flux\_kustomization\_name) | Flux Kustomization name | `string` | `"grafana-dashboards-infrastructure"` | no | +| [flux\_kustomization\_path](#input\_flux\_kustomization\_path) | Flux Kustomization Path | `string` | `"./artifacts/grafana-operator-manifests/eks/infrastructure"` | no | +| [go\_config](#input\_go\_config) | Grafana Operator configuration |
object({
create_namespace = bool
helm_chart = string
helm_name = string
k8s_namespace = string
helm_release_name = string
helm_chart_version = string
})
|
{
"create_namespace": true,
"helm_chart": "oci://ghcr.io/grafana-operator/helm-charts/grafana-operator",
"helm_chart_version": "v5.0.0-rc3",
"helm_name": "grafana-operator",
"helm_release_name": "grafana-operator",
"k8s_namespace": "grafana-operator"
}
| no | +| [grafana\_api\_key](#input\_grafana\_api\_key) | Grafana API key for the Amazon Managed Grafana workspace | `string` | `""` | no | | [grafana\_cluster\_dashboard\_url](#input\_grafana\_cluster\_dashboard\_url) | Dashboard URL for Cluster Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/cluster.json"` | no | | [grafana\_kubelet\_dashboard\_url](#input\_grafana\_kubelet\_dashboard\_url) | Dashboard URL for Kubelet Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/kubelet.json"` | no | | [grafana\_namespace\_workloads\_dashboard\_url](#input\_grafana\_namespace\_workloads\_dashboard\_url) | Dashboard URL for Namespace Workloads Grafana Dashboard JSON | `string` | `"https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/namespace-workloads.json"` | no | @@ -101,14 +101,14 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this | [helm\_config](#input\_helm\_config) | Helm Config for Prometheus | `any` | `{}` | no | | [irsa\_iam\_permissions\_boundary](#input\_irsa\_iam\_permissions\_boundary) | IAM permissions boundary for IRSA roles | `string` | `null` | no | | [irsa\_iam\_role\_path](#input\_irsa\_iam\_role\_path) | IAM role path for IRSA roles | `string` | `"/"` | no | -| [java\_config](#input\_java\_config) | Configuration object for Java/JMX monitoring |
object({
enable_alerting_rules = bool
enable_recording_rules = bool
scrape_sample_limit = number
})
|
{
"enable_alerting_rules": true,
"enable_recording_rules": true,
"scrape_sample_limit": 1000
}
| no | +| [java\_config](#input\_java\_config) | Configuration object for Java/JMX monitoring |
object({
enable_alerting_rules = bool
enable_recording_rules = bool
enable_dashboards = bool
scrape_sample_limit = number


flux_gitrepository_name = string
flux_gitrepository_url = string
flux_gitrepository_branch = string
flux_kustomization_name = string
flux_kustomization_path = string

grafana_dashboard_url = string

prometheus_metrics_endpoint = string
})
| `null` | no | | [ksm\_config](#input\_ksm\_config) | Kube State metrics configuration |
object({
create_namespace = bool
k8s_namespace = string
helm_chart_name = string
helm_chart_version = string
helm_release_name = string
helm_repo_url = string
helm_settings = map(string)
helm_values = map(any)

scrape_interval = string
scrape_timeout = string
})
|
{
"create_namespace": true,
"helm_chart_name": "kube-state-metrics",
"helm_chart_version": "4.24.0",
"helm_release_name": "kube-state-metrics",
"helm_repo_url": "https://prometheus-community.github.io/helm-charts",
"helm_settings": {},
"helm_values": {},
"k8s_namespace": "kube-system",
"scrape_interval": "60s",
"scrape_timeout": "15s"
}
| no | | [logs\_config](#input\_logs\_config) | Configuration object for logs collection |
object({
cw_log_retention_days = number
})
|
{
"cw_log_retention_days": 90
}
| no | | [managed\_prometheus\_workspace\_endpoint](#input\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus Workspace Endpoint | `string` | `""` | no | | [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus Workspace ID | `string` | `null` | no | | [managed\_prometheus\_workspace\_region](#input\_managed\_prometheus\_workspace\_region) | Amazon Managed Prometheus Workspace's Region | `string` | `null` | no | | [ne\_config](#input\_ne\_config) | Node exporter configuration |
object({
create_namespace = bool
k8s_namespace = string
helm_chart_name = string
helm_chart_version = string
helm_release_name = string
helm_repo_url = string
helm_settings = map(string)
helm_values = map(any)

scrape_interval = string
scrape_timeout = string
})
|
{
"create_namespace": true,
"helm_chart_name": "prometheus-node-exporter",
"helm_chart_version": "4.14.0",
"helm_release_name": "prometheus-node-exporter",
"helm_repo_url": "https://prometheus-community.github.io/helm-charts",
"helm_settings": {},
"helm_values": {},
"k8s_namespace": "prometheus-node-exporter",
"scrape_interval": "60s",
"scrape_timeout": "60s"
}
| no | -| [nginx\_config](#input\_nginx\_config) | Configuration object for NGINX monitoring |
object({
enable_alerting_rules = bool
scrape_sample_limit = number
prometheus_metrics_endpoint = string
})
|
{
"enable_alerting_rules": true,
"prometheus_metrics_endpoint": "metrics",
"scrape_sample_limit": 1000
}
| no | +| [nginx\_config](#input\_nginx\_config) | Configuration object for NGINX monitoring |
object({
enable_alerting_rules = bool
enable_recording_rules = bool
enable_dashboards = bool
scrape_sample_limit = number

flux_gitrepository_name = string
flux_gitrepository_url = string
flux_gitrepository_branch = string
flux_kustomization_name = string
flux_kustomization_path = string

grafana_dashboard_url = string

prometheus_metrics_endpoint = string
})
| `null` | no | | [prometheus\_config](#input\_prometheus\_config) | Controls default values such as scrape interval, timeouts and ports globally |
object({
global_scrape_interval = string
global_scrape_timeout = string
})
|
{
"global_scrape_interval": "60s",
"global_scrape_timeout": "15s"
}
| no | | [tags](#input\_tags) | Additional tags (e.g. `map('BusinessUnit`,`XYZ`) | `map(string)` | `{}` | no | | [target\_secret\_name](#input\_target\_secret\_name) | Target secret in Kubernetes to store the Grafana API Key Secret | `string` | `"grafana-admin-credentials"` | no | @@ -121,5 +121,4 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this |------|-------------| | [eks\_cluster\_id](#output\_eks\_cluster\_id) | EKS Cluster Id | | [eks\_cluster\_version](#output\_eks\_cluster\_version) | EKS Cluster version | -| [grafana\_dashboard\_urls](#output\_grafana\_dashboard\_urls) | URLs for dashboards created | diff --git a/modules/eks-monitoring/add-ons/external-secrets/README.md b/modules/eks-monitoring/add-ons/external-secrets/README.md index 8d13e60b..0506f8d0 100644 --- a/modules/eks-monitoring/add-ons/external-secrets/README.md +++ b/modules/eks-monitoring/add-ons/external-secrets/README.md @@ -32,8 +32,7 @@ This deploys an EKS Cluster with the External Secrets Operator. The cluster is p |------|------| | [aws_iam_policy.cluster_secretstore](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | [aws_kms_key.secrets](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_key) | resource | -| [aws_secretsmanager_secret.secret](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret) | resource | -| [aws_secretsmanager_secret_version.secret](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret_version) | resource | +| [aws_ssm_parameter.secret](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_parameter) | resource | | [kubectl_manifest.cluster_secretstore](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | | [kubectl_manifest.secret](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | diff --git a/modules/eks-monitoring/add-ons/external-secrets/main.tf b/modules/eks-monitoring/add-ons/external-secrets/main.tf index 8633d7b3..dd3ed2c4 100644 --- a/modules/eks-monitoring/add-ons/external-secrets/main.tf +++ b/modules/eks-monitoring/add-ons/external-secrets/main.tf @@ -36,12 +36,13 @@ resource "aws_iam_policy" "cluster_secretstore" { { "Effect": "Allow", "Action": [ - "secretsmanager:GetResourcePolicy", - "secretsmanager:GetSecretValue", - "secretsmanager:DescribeSecret", - "secretsmanager:ListSecretVersionIds" + "ssm:DescribeParameters", + "ssm:GetParameter", + "ssm:GetParameters", + "ssm:GetParametersByPath", + "ssm:GetParameterHistory" ], - "Resource": "${aws_secretsmanager_secret.secret.arn}" + "Resource": "${aws_ssm_parameter.secret.arn}" }, { "Effect": "Allow", @@ -64,7 +65,7 @@ metadata: spec: provider: aws: - service: SecretsManager + service: ParameterStore region: ${data.aws_region.current.name} auth: jwt: @@ -75,16 +76,15 @@ YAML depends_on = [module.external_secrets] } -resource "aws_secretsmanager_secret" "secret" { - recovery_window_in_days = 0 - kms_key_id = aws_kms_key.secrets.arn -} - -resource "aws_secretsmanager_secret_version" "secret" { - secret_id = aws_secretsmanager_secret.secret.id - secret_string = jsonencode({ +resource "aws_ssm_parameter" "secret" { + name = "/terraform-accelerator/grafana-api-key" + description = "SSM Secret to store grafana API Key" + type = "SecureString" + value = jsonencode({ GF_SECURITY_ADMIN_APIKEY = var.grafana_api_key }) + key_id = aws_kms_key.secrets.id + overwrite = true } resource "kubectl_manifest" "secret" { @@ -103,7 +103,7 @@ spec: name: ${var.target_secret_name} dataFrom: - extract: - key: ${aws_secretsmanager_secret.secret.name} + key: ${aws_ssm_parameter.secret.name} YAML depends_on = [module.external_secrets] } diff --git a/modules/eks-monitoring/dashboards.tf b/modules/eks-monitoring/dashboards.tf index 827fa9fd..f8260e3a 100644 --- a/modules/eks-monitoring/dashboards.tf +++ b/modules/eks-monitoring/dashboards.tf @@ -1,9 +1,11 @@ resource "kubectl_manifest" "flux_gitrepository" { - yaml_body = < [aws](#provider\_aws) | >= 4.0.0 | -| [grafana](#provider\_grafana) | >= 1.25.0 | +| [kubectl](#provider\_kubectl) | >= 1.14 | ## Modules @@ -34,21 +34,15 @@ No modules. |------|------| | [aws_prometheus_rule_group_namespace.alerting_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | | [aws_prometheus_rule_group_namespace.recording_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | -| [grafana_dashboard.this](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/dashboard) | resource | +| [kubectl_manifest.flux_kustomization](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [dashboards\_folder\_id](#input\_dashboards\_folder\_id) | Grafana folder ID for automatic dashboards | `string` | n/a | yes | -| [enable\_alerting\_rules](#input\_enable\_alerting\_rules) | Enables or disables Managed Prometheus alerting rules | `bool` | `true` | no | -| [enable\_dashboards](#input\_enable\_dashboards) | Enables or disables curated dashboards | `bool` | `true` | no | -| [enable\_recording\_rules](#input\_enable\_recording\_rules) | Enables or disables Managed Prometheus recording rules | `bool` | `true` | no | -| [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus Workspace ID | `string` | `null` | no | +| [pattern\_config](#input\_pattern\_config) | Configuration object for Java/JMX monitoring |
object({
enable_alerting_rules = bool
enable_recording_rules = bool
scrape_sample_limit = number

enable_recording_rules = bool

enable_dashboards = bool

flux_gitrepository_name = string
flux_gitrepository_url = string
flux_gitrepository_branch = string
flux_kustomization_name = string
flux_kustomization_path = string

managed_prometheus_workspace_id = string
managed_prometheus_workspace_region = string
managed_prometheus_workspace_endpoint = string

grafana_url = string
grafana_dashboard_url = string
})
| n/a | yes | ## Outputs -| Name | Description | -|------|-------------| -| [grafana\_dashboard\_urls](#output\_grafana\_dashboard\_urls) | URLs for dashboards created | +No outputs. diff --git a/modules/eks-monitoring/patterns/java/dashboards/default.json b/modules/eks-monitoring/patterns/java/dashboards/default.json deleted file mode 100644 index a73897df..00000000 --- a/modules/eks-monitoring/patterns/java/dashboards/default.json +++ /dev/null @@ -1,1840 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "description": "Dashboard for JVM metrics with Prometheus / JMX Exporter", - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 3457, - "graphTooltip": 0, - "id": 403, - "iteration": 1675938645502, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 22, - "panels": [], - "title": "Headlines", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 1 - }, - "id": 17, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "avg_over_time(java_lang_operatingsystem_systemcpuload{job=~\"$job\"}[5m]) * 100", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 30, - "tags": [] - } - ], - "title": "CPU System Load (5m avg)", - "type": "gauge" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 1 - }, - "id": 19, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "java_lang_operatingsystem_totalphysicalmemorysize{job=~\"$job\"}", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 60, - "tags": [] - } - ], - "title": "Total RAM", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 1 - }, - "id": 20, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "java_lang_operatingsystem_totalswapspacesize{job=~\"$job\"}", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 60, - "tags": [] - } - ], - "title": "Total SWAP", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 1 - }, - "id": 18, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "java_lang_operatingsystem_availableprocessors{job=~\"$job\"}", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 60, - "tags": [] - } - ], - "title": "CPU Cores", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 24, - "panels": [], - "title": "General", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 6 - }, - "id": 8, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "java_lang_operatingsystem_systemcpuload{job=~\"$job\"} * 100", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "system", - "refId": "B", - "step": 20 - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "java_lang_operatingsystem_processcpuload{job=~\".*jmx\"} * 100", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "process", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [] - } - ], - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 6 - }, - "id": 2, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "jvm_memory_bytes_used{job=~\".*jmx\"}/ ignoring(area) group_left java_lang_operatingsystem_totalphysicalmemorysize{job=~\".*jmx\"} *100", - "format": "time_series", - "interval": "", - "intervalFactor": 5, - "legendFormat": "{{area}} memory", - "metric": "jvm_memory_bytes_used", - "refId": "A", - "step": 50 - } - ], - "title": "Memory Heap /NonHeap", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 6 - }, - "id": 10, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "jvm_memory_pool_bytes_used{job=~\".*jmx\"}", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pool}}", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [] - } - ], - "title": "Memory Pool Used", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 13 - }, - "id": 3, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "jvm_threads_current{job=~\".*jmx\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 5, - "legendFormat": "current", - "metric": "jvm_threads_current", - "refId": "A", - "step": 50 - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "jvm_threads_daemon{job=~\".*jmx\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 5, - "legendFormat": "daemon", - "metric": "jvm_threads_daemon", - "refId": "B", - "step": 50 - } - ], - "title": "Threads used", - "type": "timeseries" - }, - { - "description": "jvm_classes_loaded", - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 13 - }, - "id": 4, - "interval": "60", - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(jvm_classes_loaded_total{}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 5, - "legendFormat": "{{container_name}}", - "metric": "jvm_classes_loaded", - "refId": "A", - "step": 50 - } - ], - "title": "Class loading", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 13 - }, - "id": 7, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "java_lang_operatingsystem_openfiledescriptorcount{job=~\"$job\"}", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{container_name}}", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [] - } - ], - "title": "Open File Descriptors", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 26, - "panels": [], - "title": "Catalina", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 9, - "links": [], - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "catalina_manager_activesessions{job=~\"$job\"}", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}} active", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [] - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "catalina_manager_rejectedsessions{job=~\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}} rejected", - "refId": "C", - "step": 20 - } - ], - "title": "Catalina Manager Sessions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 21 - }, - "id": 14, - "interval": "60", - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(catalina_globalrequestprocessor_requestcount_total{job=~\"$job\"}[$__rate_interval])", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}} count", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [] - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(catalina_globalrequestprocessor_errorcount_total{job=~\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}} error", - "refId": "B", - "step": 20 - } - ], - "title": "Catalina Request Processor requests", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 28 - }, - "id": 15, - "interval": "60", - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(catalina_globalrequestprocessor_processingtime_total{job=~\"$job\"}[$__rate_interval])", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [] - } - ], - "title": "Catalina Request Processing Time", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 2, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*sent/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 28 - }, - "id": 13, - "interval": "60", - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(catalina_globalrequestprocessor_bytesreceived_total{job=~\"$job\"}[$__rate_interval])", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}} recv", - "orderByTime": "ASC", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [] - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(catalina_globalrequestprocessor_bytessent_total{job=~\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}} sent", - "refId": "B", - "step": 20 - } - ], - "title": "Catalina Request Processor bytes", - "type": "timeseries" - } - ], - "refresh": "30s", - "schemaVersion": 35, - "style": "dark", - "tags": [ - "JVM", - "prometheus", - "JMX" - ], - "templating": { - "list": [ - { - "current": { - "selected": true, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "Data Source", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": "", - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "definition": "label_values(job)", - "hide": 0, - "includeAll": true, - "label": "", - "multi": true, - "name": "job", - "options": [], - "query": { - "query": "label_values(job)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Java/JMX", - "uid": "m9mHfAy7ks", - "version": 4, - "weekStart": "" -} diff --git a/modules/eks-monitoring/patterns/java/main.tf b/modules/eks-monitoring/patterns/java/main.tf index 78ca387f..ac286fb1 100644 --- a/modules/eks-monitoring/patterns/java/main.tf +++ b/modules/eks-monitoring/patterns/java/main.tf @@ -1,7 +1,8 @@ resource "aws_prometheus_rule_group_namespace" "recording_rules" { - count = var.enable_recording_rules ? 1 : 0 + count = var.pattern_config.enable_recording_rules ? 1 : 0 + name = "accelerator-java-rules" - workspace_id = var.managed_prometheus_workspace_id + workspace_id = var.pattern_config.managed_prometheus_workspace_id data = < [terraform](#requirement\_terraform) | >= 1.1.0 | | [aws](#requirement\_aws) | >= 4.0.0 | | [grafana](#requirement\_grafana) | >= 1.25.0 | +| [kubectl](#requirement\_kubectl) | >= 1.14 | | [kubernetes](#requirement\_kubernetes) | >= 2.10 | ## Providers @@ -22,7 +23,7 @@ It provides the following resources: | Name | Version | |------|---------| | [aws](#provider\_aws) | >= 4.0.0 | -| [grafana](#provider\_grafana) | >= 1.25.0 | +| [kubectl](#provider\_kubectl) | >= 1.14 | ## Modules @@ -33,19 +34,15 @@ No modules. | Name | Type | |------|------| | [aws_prometheus_rule_group_namespace.alerting_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | -| [grafana_dashboard.workloads](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/dashboard) | resource | +| [kubectl_manifest.flux_kustomization](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [dashboards\_folder\_id](#input\_dashboards\_folder\_id) | Grafana folder ID for automatic dashboards | `string` | n/a | yes | -| [enable\_alerting\_rules](#input\_enable\_alerting\_rules) | Enables or disables Managed Prometheus alerting rules | `bool` | `true` | no | -| [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus Workspace ID | `string` | `null` | no | +| [pattern\_config](#input\_pattern\_config) | Configuration object for Java/JMX monitoring |
object({
enable_alerting_rules = bool
enable_recording_rules = bool
scrape_sample_limit = number

enable_recording_rules = bool

enable_dashboards = bool

flux_gitrepository_name = string
flux_gitrepository_url = string
flux_gitrepository_branch = string
flux_kustomization_name = string
flux_kustomization_path = string

managed_prometheus_workspace_id = string
managed_prometheus_workspace_region = string
managed_prometheus_workspace_endpoint = string

grafana_url = string
grafana_dashboard_url = string
})
| n/a | yes | ## Outputs -| Name | Description | -|------|-------------| -| [grafana\_dashboard\_urls](#output\_grafana\_dashboard\_urls) | URLs for dashboards created | +No outputs. diff --git a/modules/eks-monitoring/patterns/nginx/dashboards/nginx.json b/modules/eks-monitoring/patterns/nginx/dashboards/nginx.json deleted file mode 100644 index f0ea4dc6..00000000 --- a/modules/eks-monitoring/patterns/nginx/dashboards/nginx.json +++ /dev/null @@ -1,1442 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - }, - { - "datasource": "$datasource", - "enable": true, - "expr": "sum(changes(nginx_ingress_controller_config_last_reload_successful_timestamp_seconds{instance!=\"unknown\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[30s])) by (controller_class)", - "hide": false, - "iconColor": "rgba(255, 96, 96, 1)", - "limit": 100, - "name": "Config Reloads", - "showIn": 0, - "step": "30s", - "tagKeys": "controller_class", - "tags": [], - "titleFormat": "Config Reloaded", - "type": "tags" - } - ] - }, - "description": "Ingress-nginx supports a rich collection of prometheus metrics. If you have prometheus and grafana installed on your cluster then prometheus will already be scraping this data due to the scrape annotation on the deployment.", - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 9614, - "graphTooltip": 0, - "id": 8, - "iteration": 1663262425658, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "rgb(31, 120, 193)", - "mode": "fixed" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "id": 20, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "expr": "round(sum(irate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[2m])), 0.001)", - "format": "time_series", - "intervalFactor": 1, - "refId": "A", - "step": 4 - } - ], - "title": "Controller Request Volume", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "rgb(31, 120, 193)", - "mode": "fixed" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 6, - "x": 6, - "y": 0 - }, - "id": 82, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "expr": "sum(avg_over_time(nginx_ingress_controller_nginx_process_connections{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A", - "step": 4 - } - ], - "title": "Controller Connections", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "rgb(31, 120, 193)", - "mode": "fixed" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(245, 54, 54, 0.9)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 95 - }, - { - "color": "rgba(50, 172, 45, 0.97)", - "value": 99 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 6, - "x": 12, - "y": 0 - }, - "id": 21, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "expr": "sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",status!~\"[4-5].*\"}[2m])) / sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[2m]))", - "format": "time_series", - "intervalFactor": 1, - "refId": "A", - "step": 4 - } - ], - "title": "Controller Success Rate (non-4|5xx responses)", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "rgb(31, 120, 193)", - "mode": "fixed" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 18, - "y": 0 - }, - "id": 81, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "expr": "avg(nginx_ingress_controller_success{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A", - "step": 4 - } - ], - "title": "Config Reloads", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "rgb(31, 120, 193)", - "mode": "fixed" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 0 - }, - "id": 83, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "8.4.7", - "targets": [ - { - "expr": "count(nginx_ingress_controller_config_last_reload_successful{controller_pod=~\"$controller\",controller_namespace=~\"$namespace\"} == 0)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A", - "step": 4 - } - ], - "title": "Last Config Failed", - "type": "stat" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 3 - }, - "height": "200px", - "hiddenSeries": false, - "id": 86, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": false, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.4.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "round(sum(irate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress), 0.001)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ ingress }}", - "metric": "network", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Ingress Request Volume", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "reqps", - "logBase": 1, - "show": true - }, - { - "format": "Bps", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": { - "max - istio-proxy": "#890f02", - "max - master": "#bf1b00", - "max - prometheus": "#bf1b00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "decimals": 2, - "editable": false, - "error": false, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 3 - }, - "hiddenSeries": false, - "id": 87, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 300, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.4.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",ingress=~\"$ingress\",status!~\"[4-5].*\"}[2m])) by (ingress) / sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", - "format": "time_series", - "instant": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ ingress }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Ingress Success Rate (non-4|5xx responses)", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 1, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 10 - }, - "height": "200px", - "hiddenSeries": false, - "id": 32, - "isNew": true, - "legend": { - "alignAsTable": false, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.4.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (irate (nginx_ingress_controller_request_size_sum{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", - "format": "time_series", - "instant": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Received", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "- sum (irate (nginx_ingress_controller_response_size_sum{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Sent", - "metric": "network", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Network I/O pressure", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "logBase": 1, - "show": true - }, - { - "format": "Bps", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": { - "max - istio-proxy": "#890f02", - "max - master": "#bf1b00", - "max - prometheus": "#bf1b00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "decimals": 2, - "editable": false, - "error": false, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 10 - }, - "hiddenSeries": false, - "id": 77, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.4.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(nginx_ingress_controller_nginx_process_resident_memory_bytes{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}) ", - "format": "time_series", - "instant": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "nginx", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Average Memory Usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": { - "max - istio-proxy": "#890f02", - "max - master": "#bf1b00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "decimals": 3, - "editable": false, - "error": false, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 10 - }, - "height": "", - "hiddenSeries": false, - "id": 79, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.4.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (rate (nginx_ingress_controller_nginx_process_cpu_seconds_total{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m])) ", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "nginx", - "metric": "container_cpu", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt" - } - ], - "timeRegions": [], - "title": "Average CPU Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "cores", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "columns": [], - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 16 - }, - "hideTimeOverride": false, - "id": 75, - "links": [], - "pageSize": 7, - "repeatDirection": "h", - "scroll": true, - "showHeader": true, - "sort": { - "col": 1, - "desc": true - }, - "styles": [ - { - "alias": "Ingress", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ingress", - "preserveFormat": false, - "sanitize": false, - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Requests", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value #A", - "thresholds": [ - "" - ], - "type": "number", - "unit": "ops" - }, - { - "alias": "Errors", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value #B", - "thresholds": [], - "type": "number", - "unit": "ops" - }, - { - "alias": "P50 Latency", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "pattern": "Value #C", - "thresholds": [], - "type": "number", - "unit": "dtdurations" - }, - { - "alias": "P90 Latency", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Value #D", - "thresholds": [], - "type": "number", - "unit": "dtdurations" - }, - { - "alias": "P99 Latency", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Value #E", - "thresholds": [], - "type": "number", - "unit": "dtdurations" - }, - { - "alias": "IN", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value #F", - "thresholds": [ - "" - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "OUT", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #G", - "thresholds": [], - "type": "number", - "unit": "Bps" - } - ], - "targets": [ - { - "expr": "histogram_quantile(0.50, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{ ingress }}", - "refId": "C" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{ ingress }}", - "refId": "D" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{ destination_service }}", - "refId": "E" - }, - { - "expr": "sum(irate(nginx_ingress_controller_request_size_sum{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ ingress }}", - "refId": "F" - }, - { - "expr": "sum(irate(nginx_ingress_controller_response_size_sum{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{ ingress }}", - "refId": "G" - } - ], - "title": "Ingress Percentile Response Times and Transfer Rates", - "transform": "table", - "type": "table-old" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 24 - }, - "height": "1024", - "id": 85, - "links": [], - "pageSize": 7, - "scroll": true, - "showHeader": true, - "sort": { - "col": 1, - "desc": false - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "TTL", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Current", - "thresholds": [ - "0", - "691200" - ], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "align": "auto", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "avg(nginx_ingress_controller_ssl_expire_time_seconds{kubernetes_pod_name=~\"$controller\",namespace=~\"$namespace\",ingress=~\"$ingress\"}) by (host) - time()", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ host }}", - "metric": "gke_letsencrypt_cert_expiration", - "refId": "A", - "step": 1 - } - ], - "title": "Ingress Certificate Expiry", - "transform": "timeseries_aggregations", - "type": "table-old" - } - ], - "refresh": "5s", - "schemaVersion": 35, - "style": "dark", - "tags": [ - "nginx", - "workloads" - ], - "templating": { - "list": [ - { - "allValue": ".*", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": { - "query": "label_values(nginx_ingress_controller_config_hash, controller_namespace)", - "refId": "aws-observability-accelerator-namespace-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Controller Class", - "multi": false, - "name": "controller_class", - "options": [], - "query": { - "query": "label_values(nginx_ingress_controller_config_hash{namespace=~\"$namespace\"}, controller_class) ", - "refId": "aws-observability-accelerator-controller_class-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "definition": "label_values(nginx_ingress_controller_config_hash{namespace=~\"$namespace\",controller_class=~\"$controller_class\"}, controller_pod) ", - "hide": 0, - "includeAll": true, - "label": "Controller", - "multi": false, - "name": "controller", - "options": [], - "query": { - "query": "label_values(nginx_ingress_controller_config_hash{namespace=~\"$namespace\",controller_class=~\"$controller_class\"}, controller_pod) ", - "refId": "aws-observability-accelerator-controller-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "definition": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller_pod=~\"$controller\"}, ingress) ", - "hide": 0, - "includeAll": true, - "label": "Ingress", - "multi": false, - "name": "ingress", - "options": [], - "query": { - "query": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller_pod=~\"$controller\"}, ingress) ", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 2, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": false, - "text": "aws-observability-accelerator", - "value": "aws-observability-accelerator" - }, - "description": "datasource for prometheus", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "2m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "NGINX", - "uid": "nginx", - "version": 5, - "weekStart": "" -} diff --git a/modules/eks-monitoring/patterns/nginx/main.tf b/modules/eks-monitoring/patterns/nginx/main.tf index c2dd5754..876d4bab 100644 --- a/modules/eks-monitoring/patterns/nginx/main.tf +++ b/modules/eks-monitoring/patterns/nginx/main.tf @@ -1,8 +1,8 @@ resource "aws_prometheus_rule_group_namespace" "alerting_rules" { - count = var.enable_alerting_rules ? 1 : 0 + count = var.pattern_config.enable_alerting_rules ? 1 : 0 name = "accelerator-nginx-alerting" - workspace_id = var.managed_prometheus_workspace_id + workspace_id = var.pattern_config.managed_prometheus_workspace_id data = <