From 04ddb886e98d589ef6cb2292890bf568b53247aa Mon Sep 17 00:00:00 2001 From: Dan Miller Date: Wed, 4 Dec 2024 16:00:24 -0500 Subject: [PATCH] DEV-2741: Cold Start Improvements and EKS FAQs (#717) --- docs/layers/accounts/account-baseline.mdx | 85 ++++++++++++++++++- docs/layers/accounts/deploy-accounts.mdx | 8 +- .../accounts/prepare-aws-organization.mdx | 7 +- docs/layers/eks/faq.mdx | 75 ++++++++++++++++ package-lock.json | 7 +- 5 files changed, 170 insertions(+), 12 deletions(-) diff --git a/docs/layers/accounts/account-baseline.mdx b/docs/layers/accounts/account-baseline.mdx index fa28871a9..4be7936a4 100644 --- a/docs/layers/accounts/account-baseline.mdx +++ b/docs/layers/accounts/account-baseline.mdx @@ -1,7 +1,7 @@ --- title: "Deploy CloudTrail and ECR" sidebar_label: "Deploy Account Baseline" -sidebar_position: 4 +sidebar_position: 5 --- import Intro from '@site/src/components/Intro'; import KeyPoints from '@site/src/components/KeyPoints'; @@ -17,6 +17,7 @@ Now that all the accounts have been deployed, we need to finalize the setup of t | Steps | Actions | | -------------------------- | ----------------------------------- | | Deploy baseline components | `atmos workflow deploy -f baseline` | +| Deploy account budgets | Create Slack Webhook and `atmos workflow deploy -f accounts` | @@ -26,4 +27,86 @@ Now that all the accounts have been deployed, we need to finalize the setup of t + + + ## (Optional) Deploy Account Budgets + + Budgets are an optional feature that can be enabled with [the `account-settings` component](/components/library/aws/account-settings/) for the Organization as a whole or for individual accounts. Budgets *do not restrict spending* but provide visibility into spending and can be used to set alerts when spending exceeds a certain threshold. We recommend using a dedicated Slack channel for these alerts, which we will set up with a webhook. + + + 1. [Create a Slack Webhook](https://api.slack.com/messaging/webhooks). Take note of the Webhook URL and the final name of the Slack channel. The Slack channel is case-sensitive and needs to match the name of the channel exactly as the name appears in owning Slack server (not the name if changed as a shared channel). + 2. Update the `account-settings` component with the Slack Webhook URL and the Slack channel name. + ```yaml + # stacks/catalog/account-settings.yaml + components: + terraform: + account-settings: + vars: + budgets_enabled: true + budgets_notifications_enabled: true + budgets_slack_webhook_url: https://url.slack.com/abcd/1234 + budgets_slack_username: AWS Budgets + budgets_slack_channel: aws-budgets-notifications + ``` + 3. **To enable budgets for the entire organization**, update `account-settings` in the same account as the Organization root account, typically `core-root`. This budget will include the total spending of all accounts in the Organization. + ```yaml + # stacks/orgs/acme/core/root/global-region/baseline.yaml + import: + - catalog/account-settings + + components: + terraform: + account-settings: + vars: + # Budgets in `root` apply to the Organization as a whole + budgets: + - name: Total AWS Organization Cost per Month + budget_type: COST + limit_amount: 10000 + limit_unit: USD + time_unit: MONTHLY + notification: + - comparison_operator: GREATER_THAN + notification_type: FORECASTED + threshold_type: PERCENTAGE + threshold: 80 + subscribers: + - slack + - comparison_operator: GREATER_THAN + notification_type: FORECASTED + threshold_type: PERCENTAGE + threshold: 100 + subscribers: + - slack + - comparison_operator: GREATER_THAN + notification_type: ACTUAL + threshold_type: PERCENTAGE + threshold: 100 + subscribers: + - slack + ``` + 4. **To enable budgets for individual accounts**, update `account-settings` in the account you want to enable budgets for or as the default setting for all `account-settings` components to apply to every account. This budget will include the spending of the given account only. + ```yaml + # stacks/catalog/account-settings.yaml + components: + terraform: + account-settings: + vars: + ... + budgets: + - name: 1000-total-monthly + budget_type: COST + limit_amount: "1000" + limit_unit: USD + time_unit: MONTHLY + - name: s3-3GB-limit-monthly + budget_type: USAGE + limit_amount: "3" + limit_unit: GB + time_unit: MONTHLY + ``` + 5. Finally, reapply `account-settings` in any changed account to apply the new settings + + + diff --git a/docs/layers/accounts/deploy-accounts.mdx b/docs/layers/accounts/deploy-accounts.mdx index fcdd9f0c2..4fcd8b31b 100644 --- a/docs/layers/accounts/deploy-accounts.mdx +++ b/docs/layers/accounts/deploy-accounts.mdx @@ -48,17 +48,15 @@ This step-by-step process outlines how to deploy AWS accounts using `atmos` work - ## Configure Root Account as Organization + ## Confirm the Root Account is configured as an Organization - Before performing the "Deploy Accounts" step, the root account needs to be configured as an AWS Organization. - - This process also enables [AWS RAM for Organizations](https://docs.aws.amazon.com/organizations/latest/userguide/orgs_manage_policies_enable-ram.html) via a CLI command, which is required for connecting the Organization. + The previous step will create the AWS Organization and configure the `core-root` account as the "root" account. Take the time now to verify that the root account is configured as an AWS Organization. and that [AWS RAM for Organizations](https://docs.aws.amazon.com/organizations/latest/userguide/orgs_manage_policies_enable-ram.html) is enabled, which is required for connecting the Organization. ## Raise Account Limits - To deploy all accounts, we need to request an increase of the Account Quota from AWS support, which requires an AWS Organization to be created first. + If you haven't already completed the Account Quota increase, now is the time to do so. To deploy all accounts, we need to request an increase of the Account Quota from AWS support, which requires an AWS Organization to be created first. From the `root` account (not `SuperAdmin`), increase the [account quota to 20+](https://us-east-1.console.aws.amazon.com/servicequotas/home/services/organizations/quotas) for the Cloud Posse reference architecture, or more depending on your business use-case diff --git a/docs/layers/accounts/prepare-aws-organization.mdx b/docs/layers/accounts/prepare-aws-organization.mdx index cd74382e8..951a35afc 100644 --- a/docs/layers/accounts/prepare-aws-organization.mdx +++ b/docs/layers/accounts/prepare-aws-organization.mdx @@ -55,8 +55,13 @@ From the root account: For billing users, you need to enable IAM access. As the root user [open up the account settings for AWS Billing](https://us-east-1.console.aws.amazon.com/billing/home?region=us-east-1#/Account), then scroll to the section "IAM user and role access to Billing information" and enable it. -3. ### Enable Regions (Optional) +1. ### Enable Regions (Optional) The 17 original AWS regions are enabled by default. If you are using a region that is not enabled by default (such as Middle East/Bahrain), you need to take extra steps. For details, see [the detailed documentation](/layers/accounts/tutorials/manual-configuration/#optional-enable-regions) +1. ### Prepare for Account Quota Increase + In order to deploy all accounts, you need to request an increase of the Account Quota from AWS support. This requires an AWS Organization to be created first, which we will create with Terraform in the [Deploy Accounts guide](/layers/accounts/deploy-accounts/#-prepare-account-deployment). This request can take a few days to process, so it's important to get it started early so that it doesn't become a blocker. + + At this time we don't need to request the increase, but we should be prepared to do so as soon as the AWS Organization is created. + For more details, see diff --git a/docs/layers/eks/faq.mdx b/docs/layers/eks/faq.mdx index 968215e3a..654d25db9 100644 --- a/docs/layers/eks/faq.mdx +++ b/docs/layers/eks/faq.mdx @@ -38,3 +38,78 @@ launch and scale runners for GitHub automatically. For more on how to set up ARC, see the [GitHub Action Runners setup docs for EKS](/layers/github-actions/eks-github-actions-controller/). + +## Common Connectivity Issues and Solutions + +If you're having trouble connecting to your EKS cluster, follow these comprehensive steps to diagnose and resolve the issue: + + +**1. Test Basic Connectivity** + +First, test basic connectivity to your cluster endpoint. This helps isolate whether the issue is with basic network connectivity or something more specific: + +```bash +curl -fsSk --max-time 5 "https://CLUSTER_ENDPOINT/healthz" +``` + +If these tests fail, it indicates a fundamental connectivity issue that needs to be addressed before proceeding to more specific troubleshooting. + +**2. Check Node Communication** + +If worker nodes aren't joining the cluster, follow these detailed steps: + +- Verify that the addon stack file (e.g., `stacks/catalog/eks/mixins/k8s-1-29.yaml`) is imported into your stack. +- Verify cluster add-ons are properly configured for your EKS version. + - Check CoreDNS is running + - Verify kube-proxy is deployed + - Ensure VPC CNI is correctly configured +- Confirm the rendered component stack configuration. + +```bash +atmos describe component eks/cluster -s +``` + +**3. Verify Network Configuration** + +- Security Groups: + - Control plane security group must allow port 443 inbound from worker nodes + - Worker node security group must allow all traffic between nodes + - Verify outbound internet access for pulling container images +- Subnet Routes: + - Verify route tables have paths to all required destinations + - Check for conflicting or overlapping CIDR ranges + - Ensure NAT Gateway is properly configured for private subnets +- Transit Gateway: + - Verify TGW attachments are active and associated + - Check TGW route tables for correct propagation + - Confirm cross-account routing if applicable +- Private Subnets Configuration: + - Set `cluster_private_subnets_only: true` in your configuration + - Ensure private subnets have proper NAT Gateway routing + +**4. VPN Connectivity** + +When accessing via AWS Client VPN, verify these configurations: + +- VPN Routes: + - Check route table entries for EKS VPC CIDR + - Verify routes are active and not in pending state + - Confirm no conflicting routes exist +- Subnet Associations: + - Ensure VPN endpoint is associated with correct subnets + - Verify subnet route tables include VPN CIDR range +- Authorization Rules: + - Check network ACLs allow VPN CIDR range + - Verify security group rules permit VPN traffic + - Confirm IAM roles have necessary permissions + +After making any changes, have clients disconnect and reconnect to receive updated routes. + +**5. Advanced Diagnostics** + +- AWS Reachability Analyzer: + - Enable cross-account analysis for VPC peering or TGW connections + - Test from VPN ENI to cluster endpoint + - Test return path from cluster to VPN ENI + + diff --git a/package-lock.json b/package-lock.json index 3f666001f..8d20ec124 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6428,8 +6428,8 @@ "license": "MIT" }, "node_modules/custom-loaders": { - "resolved": "plugins/custom-loaders", - "link": true + "version": "0.0.0", + "resolved": "file:plugins/custom-loaders" }, "node_modules/cytoscape": { "version": "3.30.1", @@ -18860,9 +18860,6 @@ "type": "github", "url": "https://github.com/sponsors/wooorm" } - }, - "plugins/custom-loaders": { - "version": "0.0.0" } } }