From 1735f1678eda4f0aad5a234d9810e750a320c483 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Mon, 7 Oct 2024 14:02:33 +1300 Subject: [PATCH 1/4] build: Update EKS from 29 to 30 TDE-1255 (#783) #### Motivation Use recent version of AWS EKS. #### Checklist - [ ] Tests updated (N/A) - [x] Docs updated - [x] Issue linked in Title --- docs/infrastructure/kubernetes.version.md | 5 +++- infra/README.md | 24 ++++++++--------- infra/charts/argo.extras.ts | 2 +- infra/charts/argo.workflows.ts | 2 +- infra/charts/cloudflared.ts | 2 +- infra/charts/event.exporter.ts | 4 +-- infra/charts/kube-system.coredns.ts | 2 +- infra/charts/kube-system.node.local.dns.ts | 2 +- infra/eks/cluster.ts | 8 +++--- package-lock.json | 30 +++++++++++----------- package.json | 4 +-- 11 files changed, 42 insertions(+), 43 deletions(-) diff --git a/docs/infrastructure/kubernetes.version.md b/docs/infrastructure/kubernetes.version.md index a76a3a995..3b5c61e72 100644 --- a/docs/infrastructure/kubernetes.version.md +++ b/docs/infrastructure/kubernetes.version.md @@ -59,7 +59,10 @@ Below is an example of upgrading from v1.27 to v1.28 4. Diff the stack to make sure that only versions are updated ```bash - npx cdk diff Workflows -c ci-role-arn=... + ci_role="$(aws iam list-roles | jq --raw-output '.Roles[] | select(.RoleName | contains("CiTopo")) | select(.RoleName | contains("-CiRole")).Arn')" + admin_role="arn:aws:iam::$(aws sts get-caller-identity --query Account --output text):role/AccountAdminRole" + workflow_maintainer_role="$(aws cloudformation describe-stacks --stack-name=TopographicSharedResourcesProd | jq --raw-output .Stacks[0].Outputs[0].OutputValue)" + npx cdk diff --context=maintainer-arns="${ci_role},${admin_role},${workflow_maintainer_role}" Workflows ``` The only changes should be Kubernetes version related. diff --git a/infra/README.md b/infra/README.md index 7f8f62c63..c9ee43427 100644 --- a/infra/README.md +++ b/infra/README.md @@ -30,28 +30,24 @@ Main entry point: [app](./cdk8s.ts) ```shell npm install ``` - - Login to AWS ### Deploy CDK -To deploy with AWS CDK a few configuration variables need to be set - -Due to VPC lookups a AWS account ID needs to be provided +To deploy with AWS CDK a few context values need to be set: -This can be done with either a `export CDK_DEFAULT_ACCOUNT=1234567890` or passed in at run time with `-c aws-account-id=1234567890` +- `aws-account-id`: Account ID to deploy into. This can be set with `export CDK_DEFAULT_ACCOUNT="$(aws sts get-caller-identity --query Account --output text)"`. +- `maintainer-arns`: Comma-separated list of AWS Role ARNs for the stack maintainers. -Then a deployment can be made with `cdk` +Then a deployment can be made with `cdk`: ```shell -npx cdk diff -c aws-account-id=1234567890 -c ci-role-arn=arn::... +ci_role="$(aws iam list-roles | jq --raw-output '.Roles[] | select(.RoleName | contains("CiTopo")) | select(.RoleName | contains("-CiRole")).Arn')" +admin_role="arn:aws:iam::$(aws sts get-caller-identity --query Account --output text):role/AccountAdminRole" +workflow_maintainer_role="$(aws cloudformation describe-stacks --stack-name=TopographicSharedResourcesProd | jq --raw-output .Stacks[0].Outputs[0].OutputValue)" +npx cdk deploy --context=maintainer-arns="${ci_role},${admin_role},${workflow_maintainer_role}" Workflows ``` -#### CDK Context - -- `aws-account-id`: Account ID to deploy into -- `ci-role-arn`: AWS Role ARN for the CI user - ### Deploy CDK8s Generate the kubernetes configuration yaml into `dist/` @@ -63,12 +59,12 @@ npx cdk8s synth Apply the generated yaml files ```shell -kubectl apply -f dist/ +kubectl apply --filename=dist/ ``` ### Testing -To debug use the following as `cdk8s syth` swallows the errors +To debug use the following as `cdk8s synth` swallows the errors ```shell npx tsx infra/cdk8s.ts diff --git a/infra/charts/argo.extras.ts b/infra/charts/argo.extras.ts index 23d59d3c9..74136fe78 100644 --- a/infra/charts/argo.extras.ts +++ b/infra/charts/argo.extras.ts @@ -1,5 +1,5 @@ import { Chart, ChartProps } from 'cdk8s'; -import * as kplus from 'cdk8s-plus-29'; +import * as kplus from 'cdk8s-plus-30'; import { Construct } from 'constructs'; import { applyDefaultLabels } from '../util/labels.js'; diff --git a/infra/charts/argo.workflows.ts b/infra/charts/argo.workflows.ts index 5aff0e457..8758297cf 100644 --- a/infra/charts/argo.workflows.ts +++ b/infra/charts/argo.workflows.ts @@ -1,5 +1,5 @@ import { Chart, ChartProps, Duration, Helm } from 'cdk8s'; -import { Secret } from 'cdk8s-plus-29'; +import { Secret } from 'cdk8s-plus-30'; import { Construct } from 'constructs'; import { ArgoDbName, ArgoDbUser, DefaultRegion } from '../constants.js'; diff --git a/infra/charts/cloudflared.ts b/infra/charts/cloudflared.ts index 5dd674d8c..96f3b3341 100644 --- a/infra/charts/cloudflared.ts +++ b/infra/charts/cloudflared.ts @@ -1,5 +1,5 @@ import { Chart, ChartProps, Size } from 'cdk8s'; -import * as kplus from 'cdk8s-plus-29'; +import * as kplus from 'cdk8s-plus-30'; import { Construct } from 'constructs'; import { applyDefaultLabels } from '../util/labels.js'; diff --git a/infra/charts/event.exporter.ts b/infra/charts/event.exporter.ts index 4bd0e61c8..e7f3a6961 100644 --- a/infra/charts/event.exporter.ts +++ b/infra/charts/event.exporter.ts @@ -8,7 +8,7 @@ import { Namespace, ServiceAccount, Volume, -} from 'cdk8s-plus-29'; +} from 'cdk8s-plus-30'; import { Construct } from 'constructs'; import { applyDefaultLabels } from '../util/labels.js'; @@ -28,7 +28,7 @@ export class EventExporter extends Chart { metadata: { name: 'event-exporter', namespace: props.namespace }, }); - // https://cdk8s.io/docs/latest/plus/cdk8s-plus-29/rbac/#role + // https://cdk8s.io/docs/latest/plus/cdk8s-plus-30/rbac/#role const clusterRole = new ClusterRole(this, 'event-exporter-cr', { metadata: { name: 'event-exporter' }, }); diff --git a/infra/charts/kube-system.coredns.ts b/infra/charts/kube-system.coredns.ts index e2a30ab87..7551a7551 100644 --- a/infra/charts/kube-system.coredns.ts +++ b/infra/charts/kube-system.coredns.ts @@ -1,5 +1,5 @@ import { Chart, ChartProps } from 'cdk8s'; -import * as kplus from 'cdk8s-plus-29'; +import * as kplus from 'cdk8s-plus-30'; import { Construct } from 'constructs'; import { applyDefaultLabels } from '../util/labels.js'; diff --git a/infra/charts/kube-system.node.local.dns.ts b/infra/charts/kube-system.node.local.dns.ts index 588723bf4..2cff82cb5 100644 --- a/infra/charts/kube-system.node.local.dns.ts +++ b/infra/charts/kube-system.node.local.dns.ts @@ -1,5 +1,5 @@ import { ApiObject, Chart, ChartProps, JsonPatch, Size } from 'cdk8s'; -import * as kplus from 'cdk8s-plus-29'; +import * as kplus from 'cdk8s-plus-30'; import { Construct } from 'constructs'; import { applyDefaultLabels } from '../util/labels.js'; diff --git a/infra/eks/cluster.ts b/infra/eks/cluster.ts index 52870f87b..ed0aa18b6 100644 --- a/infra/eks/cluster.ts +++ b/infra/eks/cluster.ts @@ -1,4 +1,4 @@ -import { KubectlV29Layer } from '@aws-cdk/lambda-layer-kubectl-v29'; +import { KubectlV30Layer } from '@aws-cdk/lambda-layer-kubectl-v30'; import { Aws, CfnOutput, Duration, RemovalPolicy, SecretValue, Size, Stack, StackProps } from 'aws-cdk-lib'; import * as chatbot from 'aws-cdk-lib/aws-chatbot'; import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; @@ -44,7 +44,7 @@ export class LinzEksCluster extends Stack { /* Cluster ID */ id: string; /** Version of EKS to use, this must be aligned to the `kubectlLayer` */ - version = KubernetesVersion.of('1.29'); + version = KubernetesVersion.of('1.30'); /** Argo needs a database for workflow archive */ argoDb: DatabaseInstance; /** Argo needs a temporary bucket to store objects */ @@ -72,7 +72,7 @@ export class LinzEksCluster extends Stack { defaultCapacity: 0, vpcSubnets: [{ subnetType: SubnetType.PRIVATE_WITH_EGRESS }], /** This must align to Cluster version: {@link version} */ - kubectlLayer: new KubectlV29Layer(this, 'KubeCtlLayer'), + kubectlLayer: new KubectlV30Layer(this, 'KubeCtlLayer'), /** To prevent IP exhaustion when running huge workflows run using ipv6 */ ipFamily: IpFamily.IP_V6, clusterLogging: [ClusterLoggingTypes.API, ClusterLoggingTypes.CONTROLLER_MANAGER, ClusterLoggingTypes.SCHEDULER], @@ -81,7 +81,7 @@ export class LinzEksCluster extends Stack { // TODO: setup up a database CNAME for changing Argo DB without updating Argo config // TODO: run a Disaster Recovery test to recover database data this.argoDb = new DatabaseInstance(this, ArgoDbInstanceName, { - engine: DatabaseInstanceEngine.postgres({ version: PostgresEngineVersion.VER_15_3 }), + engine: DatabaseInstanceEngine.postgres({ version: PostgresEngineVersion.VER_15_7 }), instanceType: InstanceType.of(InstanceClass.T3, InstanceSize.SMALL), vpc: this.vpc, databaseName: ArgoDbName, diff --git a/package-lock.json b/package-lock.json index 344b69775..1227eabae 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.2", "license": "MIT", "devDependencies": { - "@aws-cdk/lambda-layer-kubectl-v29": "^2.1.0", + "@aws-cdk/lambda-layer-kubectl-v30": "^2.0.1", "@aws-sdk/client-cloudformation": "3.658.1", "@aws-sdk/client-eks": "3.658.1", "@aws-sdk/client-ssm": "3.658.1", @@ -19,7 +19,7 @@ "aws-cdk-lib": "2.160.x", "cdk8s": "^2.69.5", "cdk8s-cli": "^2.198.228", - "cdk8s-plus-29": "^2.5.5", + "cdk8s-plus-30": "^2.2.5", "constructs": "^10.3.0", "tsx": "^4.6.2" }, @@ -89,14 +89,14 @@ "node": ">=10" } }, - "node_modules/@aws-cdk/lambda-layer-kubectl-v29": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@aws-cdk/lambda-layer-kubectl-v29/-/lambda-layer-kubectl-v29-2.1.0.tgz", - "integrity": "sha512-YwSyM3eNK5DiEY+5HWzVmkLzEMFSyTpsBSqki/kNePwH+UXP//Nmee2vMEIYxNFW6tpN3dx+B4gYNiJhBwGhKQ==", + "node_modules/@aws-cdk/lambda-layer-kubectl-v30": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@aws-cdk/lambda-layer-kubectl-v30/-/lambda-layer-kubectl-v30-2.0.1.tgz", + "integrity": "sha512-R4N2OTq9jCxARAmrp2TBNRkVreVa01wgAC4GNRRfZ8C4UD5+Cz+vylIyyJsVPD7WWZpdBSWDidnVMpvwTpAsQQ==", "dev": true, "license": "Apache-2.0", "peerDependencies": { - "aws-cdk-lib": "^2.94.0", + "aws-cdk-lib": "^2.85.0", "constructs": "^10.0.5" } }, @@ -3194,10 +3194,10 @@ "concat-map": "0.0.1" } }, - "node_modules/cdk8s-plus-29": { - "version": "2.5.5", - "resolved": "https://registry.npmjs.org/cdk8s-plus-29/-/cdk8s-plus-29-2.5.5.tgz", - "integrity": "sha512-/id21yLH/xaa53vnun3ucp3bhiZ1Gp8zbWW2guc4gbY2RibKjrAhwFtnFXkleWB/KLnW/CzxAWeqsRp+08g6rw==", + "node_modules/cdk8s-plus-30": { + "version": "2.2.5", + "resolved": "https://registry.npmjs.org/cdk8s-plus-30/-/cdk8s-plus-30-2.2.5.tgz", + "integrity": "sha512-uAraj0dBF+1pVSj5CCu4NvJnQvgjBGtzkmPrvyb6V4mIclSdsfh+PRcm3WgaX8g16k6YE6pMQnuc3PAQYVXCyA==", "bundleDependencies": [ "minimatch" ], @@ -3214,19 +3214,19 @@ "constructs": "^10.3.0" } }, - "node_modules/cdk8s-plus-29/node_modules/balanced-match": { + "node_modules/cdk8s-plus-30/node_modules/balanced-match": { "version": "1.0.2", "dev": true, "inBundle": true, "license": "MIT" }, - "node_modules/cdk8s-plus-29/node_modules/concat-map": { + "node_modules/cdk8s-plus-30/node_modules/concat-map": { "version": "0.0.1", "dev": true, "inBundle": true, "license": "MIT" }, - "node_modules/cdk8s-plus-29/node_modules/minimatch": { + "node_modules/cdk8s-plus-30/node_modules/minimatch": { "version": "3.1.2", "dev": true, "inBundle": true, @@ -3238,7 +3238,7 @@ "node": "*" } }, - "node_modules/cdk8s-plus-29/node_modules/minimatch/node_modules/brace-expansion": { + "node_modules/cdk8s-plus-30/node_modules/minimatch/node_modules/brace-expansion": { "version": "1.1.11", "dev": true, "inBundle": true, diff --git a/package.json b/package.json index 967edcc72..f9b9be51a 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "test": "node --import tsx --test infra/**/*.test.ts templates/common/__test__/*.test.ts" }, "devDependencies": { - "@aws-cdk/lambda-layer-kubectl-v29": "^2.1.0", + "@aws-cdk/lambda-layer-kubectl-v30": "^2.0.1", "@aws-sdk/client-cloudformation": "3.658.1", "@aws-sdk/client-eks": "3.658.1", "@aws-sdk/client-ssm": "3.658.1", @@ -33,7 +33,7 @@ "aws-cdk-lib": "2.160.x", "cdk8s": "^2.69.5", "cdk8s-cli": "^2.198.228", - "cdk8s-plus-29": "^2.5.5", + "cdk8s-plus-30": "^2.2.5", "constructs": "^10.3.0", "tsx": "^4.6.2" } From fc300eec0531f9f2af5e731757bd510b6e002b36 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Tue, 8 Oct 2024 09:08:12 +1300 Subject: [PATCH 2/4] docs: Explain how to destroy an installation TDE-1276 (#795) #### Motivation Add and clarify documentation. #### Checklist - [ ] Tests updated - [x] Docs updated - [x] Issue linked in Title --- .github/dependabot.yml | 62 ++++++++++----------- README.md | 1 - docs/infrastructure/components/karpenter.md | 2 +- docs/infrastructure/destroy.md | 25 +++++++++ docs/infrastructure/helm.md | 2 +- docs/infrastructure/initial.deployment.md | 3 +- docs/infrastructure/kubernetes.version.md | 24 ++++---- docs/labels.md | 22 ++++---- infra/README.md | 1 + templates/argo-tasks/stac-validate.yml | 14 ++--- 10 files changed, 93 insertions(+), 63 deletions(-) create mode 100644 docs/infrastructure/destroy.md diff --git a/.github/dependabot.yml b/.github/dependabot.yml index fd402df3e..aef7fdd52 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,33 +1,33 @@ version: 2 updates: -- package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: daily -- package-ecosystem: "docker" - directory: "/.github/workflows" - schedule: - interval: daily -- package-ecosystem: npm - directory: "/" - schedule: - interval: daily - open-pull-requests-limit: 10 - groups: - aws-sdk: - patterns: - - "@aws-sdk/*" - aws-cdk: - patterns: - - "@aws-cdk/*" - - "aws-cdk" - - "aws-cdk-lib" - - "cdk8s" - - "cdk8s-cli" - - "cdk8s-plus-*" - - "constructs" - ignore: - - dependency-name: "@aws-sdk/*" - update-types: ["version-update:semver-patch"] - - dependency-name: "@types/node" - update-types: ["version-update:semver-patch"] + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: daily + - package-ecosystem: 'docker' + directory: '/.github/workflows' + schedule: + interval: daily + - package-ecosystem: npm + directory: '/' + schedule: + interval: daily + open-pull-requests-limit: 10 + groups: + aws-sdk: + patterns: + - '@aws-sdk/*' + aws-cdk: + patterns: + - '@aws-cdk/*' + - 'aws-cdk' + - 'aws-cdk-lib' + - 'cdk8s' + - 'cdk8s-cli' + - 'cdk8s-plus-*' + - 'constructs' + ignore: + - dependency-name: '@aws-sdk/*' + update-types: ['version-update:semver-patch'] + - dependency-name: '@types/node' + update-types: ['version-update:semver-patch'] diff --git a/README.md b/README.md index 49fdfdd26..c0ab12387 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,6 @@ To connect to the EKS cluster you need to be [logged into AWS](https://toitutewh Then to setup the cluster, only the first time using the cluster you need to run this - ```bash aws --region=ap-southeast-2 eks update-kubeconfig --name=Workflows ``` diff --git a/docs/infrastructure/components/karpenter.md b/docs/infrastructure/components/karpenter.md index f0f6647ce..bc93aaeed 100644 --- a/docs/infrastructure/components/karpenter.md +++ b/docs/infrastructure/components/karpenter.md @@ -1 +1 @@ -# Karpenter \ No newline at end of file +# Karpenter diff --git a/docs/infrastructure/destroy.md b/docs/infrastructure/destroy.md new file mode 100644 index 000000000..5f3099d15 --- /dev/null +++ b/docs/infrastructure/destroy.md @@ -0,0 +1,25 @@ +# How to destroy an installation + +Destroying the cluster and stack is not easy, because we use some custom EKS resources to link the two together. Based on a teardown, at time of writing the following sequence should work: + +1. Delete the cluster: + + ```bash + aws eks delete-cluster --name=Workflows + aws eks wait cluster-deleted --name=Workflows + ``` + +1. Attempt to delete the stack: + + ```bash + aws cloudformation delete-stack --stack-name=Workflows + aws cloudformation wait stack-delete-complete --stack-name=Workflows + ``` + +1. Wait for the above to fail. +1. Go to the [stack in AWS console](https://ap-southeast-2.console.aws.amazon.com/cloudformation/home?region=ap-southeast-2#/stacks/?filteringText=Workflows&filteringStatus=active&viewNested=true) +1. Delete the stack, retaining all the resources which could not be deleted + +The reason we don't use the CLI for the last step is that the logical ID of the resources which could not be deleted does not seem to be the same as the ones which need to be retained. The reason is uncertain, but for now deleting in the console is safer. + +[How do I troubleshoot custom resource failures in AWS CloudFormation?](https://repost.aws/knowledge-center/cfn-troubleshoot-custom-resource-failures) might be relevant for future issues like this. diff --git a/docs/infrastructure/helm.md b/docs/infrastructure/helm.md index 11c0dae50..a7d1ce1b4 100644 --- a/docs/infrastructure/helm.md +++ b/docs/infrastructure/helm.md @@ -12,4 +12,4 @@ However, some of the component Helm charts do not have a `values.schema.json`. A - [aws-for-fluent-bit](./components/fluentbit.md) () - [Karpenter](./components/karpenter.md) -- [Argo workflows](./components/argo.workflows.md) \ No newline at end of file +- [Argo workflows](./components/argo.workflows.md) diff --git a/docs/infrastructure/initial.deployment.md b/docs/infrastructure/initial.deployment.md index 33fec4a2d..6d13a61bb 100644 --- a/docs/infrastructure/initial.deployment.md +++ b/docs/infrastructure/initial.deployment.md @@ -12,5 +12,6 @@ The first time a cluster is deployed Custom Resource Definitions (CRD) will not This means that any resources that require a CRD will fail to deploy with a error similar to > resource mapping not found for name: "karpenter-template" namespace: "" from "dist/0003-karpenter-provisioner.k8s.yaml": no matches for kind "AWSNodeTemplate" in version "karpenter.k8s.aws/v1alpha1" +> ensure CRDs are installed first -To work around this problem the first deployment can be repeated, as the CRDs are deployed early in the deployment process. +To work around this problem, re-run the `kubectl apply` command. diff --git a/docs/infrastructure/kubernetes.version.md b/docs/infrastructure/kubernetes.version.md index 3b5c61e72..fc350dda0 100644 --- a/docs/infrastructure/kubernetes.version.md +++ b/docs/infrastructure/kubernetes.version.md @@ -17,6 +17,7 @@ If there is a version matching to the Kubernetes version to upgrade to, upgrade ```bash npm install --save-dev cdk8s-plus-27 ``` + 2. Remove the previous version ```bash @@ -34,12 +35,13 @@ Below is an example of upgrading from v1.27 to v1.28 ```bash npm install --save-dev @aws-cdk/lambda-layer-kubectl-v28 ``` - + While also removing the old lambda-layer version - + ```bash npm rm @aws-cdk/lambda-layer-kubectl-v27 ``` + 2. Set the new Kubernetes version in `LinzEksCluster` ```typescript @@ -50,9 +52,9 @@ Below is an example of upgrading from v1.27 to v1.28 ```typescript import { KubectlV28Layer } from '@aws-cdk/lambda-layer-kubectl-v28'; - + // ... - + kubectlLayer: new KubectlV28Layer(this, 'KubeCtlLayer'), ``` @@ -64,9 +66,9 @@ Below is an example of upgrading from v1.27 to v1.28 workflow_maintainer_role="$(aws cloudformation describe-stacks --stack-name=TopographicSharedResourcesProd | jq --raw-output .Stacks[0].Outputs[0].OutputValue)" npx cdk diff --context=maintainer-arns="${ci_role},${admin_role},${workflow_maintainer_role}" Workflows ``` - + The only changes should be Kubernetes version related. - + ``` Resources [~] AWS::Lambda::LayerVersion KubeCtlLayer KubeCtlLayer replace @@ -95,8 +97,9 @@ Below is an example of upgrading from v1.27 to v1.28 ## Cycle out EC2 Nodes to the new version + > **Are Amazon EKS managed node groups automatically updated along with the cluster control plane version?** -No. A managed node group creates Amazon EC2 instances in your account. These instances aren't automatically upgraded when you or Amazon EKS update your control plane. For more information, see Updating a managed node group. We recommend maintaining the same Kubernetes version on your control plane and nodes. +> No. A managed node group creates Amazon EC2 instances in your account. These instances aren't automatically upgraded when you or Amazon EKS update your control plane. For more information, see Updating a managed node group. We recommend maintaining the same Kubernetes version on your control plane and nodes. This process is necessary to avoid being blocked for a future Kubernetes version upgrade. For example, if Kubernetes get upgraded from `1.27` to `1.28` and the nodes remain in `1.27`, the next time Kubernetes will be upgraded to `1.29`, the upgrade will fail. @@ -105,10 +108,11 @@ This process is necessary to avoid being blocked for a future Kubernetes version ```bash node_group_name="$(aws eks list-nodegroups --cluster-name=Workflows | jq --raw-output '.nodegroups[]')" ``` + 2. Describe the nodegroup to validate the versions By describing the node group you can check the current version, or you can use `k get nodes` to see what version is currently running - + ```bash aws eks describe-nodegroup --cluster-name=Workflows --nodegroup-name="$node_group_name" | jq --raw-output .nodegroup.version ``` @@ -118,9 +122,9 @@ This process is necessary to avoid being blocked for a future Kubernetes version ```bash aws eks update-nodegroup-version --cluster-name=Workflows --nodegroup-name="$node_group_name" ``` - + This step takes some time to run. You can wait for it to finish with this command: - + ```bash aws eks wait nodegroup-active --cluster-name=Workflows --nodegroup-name="$node_group_name" ``` diff --git a/docs/labels.md b/docs/labels.md index a43ba2069..9b406b1f4 100644 --- a/docs/labels.md +++ b/docs/labels.md @@ -8,11 +8,11 @@ The following list of labels should be used in conjunction with Kubernetes [well ## Workflows -| Label | Description | Examples | -| --------------------- | ---------------------------------------- |--------------------------------------| -| `linz.govt.nz/ticket` | JIRA Ticket number | `TDE-912`, `BM-37` | -| `linz.govt.nz/region` | Geographic region that object relates to | "wellington", "auckland" | -| `linz.govt.nz/category` | The LINZ group that owns the workflow | "basemaps", "raster", "test", "util" | +| Label | Description | Examples | +| ----------------------- | ---------------------------------------- | ------------------------------------ | +| `linz.govt.nz/ticket` | JIRA Ticket number | `TDE-912`, `BM-37` | +| `linz.govt.nz/region` | Geographic region that object relates to | "wellington", "auckland" | +| `linz.govt.nz/category` | The LINZ group that owns the workflow | "basemaps", "raster", "test", "util" | For the type of data that is being processed @@ -25,12 +25,12 @@ For the type of data that is being processed Most other objects deployed via AWS-CDK and CDK8s should also include information about the CICD process that deployed it -| Label | Description | Examples | -| -------------------------- | ---------------------------------------- | ------------------------------------------ | -| `linz.govt.nz/git-hash` | git hash that deployed the object | "bb3dab2779922094d2b8ecd4c67f30c66b38613d" | -| `linz.govt.nz/git-version` | git version information | "v6.46.0", "v0.0.1-20-gbb3dab27" | -| `linz.govt.nz/git-repository` | git repository that the object came from | "linz\_\_topo-workflows" | -| `linz.govt.nz/build-id` | Unique ID of the build that deployed | "6806791032-1" | +| Label | Description | Examples | +| ----------------------------- | ---------------------------------------- | ------------------------------------------ | +| `linz.govt.nz/git-hash` | git hash that deployed the object | "bb3dab2779922094d2b8ecd4c67f30c66b38613d" | +| `linz.govt.nz/git-version` | git version information | "v6.46.0", "v0.0.1-20-gbb3dab27" | +| `linz.govt.nz/git-repository` | git repository that the object came from | "linz\_\_topo-workflows" | +| `linz.govt.nz/build-id` | Unique ID of the build that deployed | "6806791032-1" | ## Label Usage diff --git a/infra/README.md b/infra/README.md index c9ee43427..8072269eb 100644 --- a/infra/README.md +++ b/infra/README.md @@ -30,6 +30,7 @@ Main entry point: [app](./cdk8s.ts) ```shell npm install ``` + - Login to AWS ### Deploy CDK diff --git a/templates/argo-tasks/stac-validate.yml b/templates/argo-tasks/stac-validate.yml index bc6a87710..838535b7b 100644 --- a/templates/argo-tasks/stac-validate.yml +++ b/templates/argo-tasks/stac-validate.yml @@ -51,10 +51,10 @@ spec: - name: AWS_ROLE_CONFIG_PATH value: s3://linz-bucket-config/config.json args: - - 'stac' - - 'validate' - - '--concurrency={{inputs.parameters.concurrency}}' - - '--recursive={{inputs.parameters.recursive}}' - - '--checksum-assets={{inputs.parameters.checksum_assets}}' - - '--checksum-links={{inputs.parameters.checksum_links}}' - - '{{inputs.parameters.uri}}' + - 'stac' + - 'validate' + - '--concurrency={{inputs.parameters.concurrency}}' + - '--recursive={{inputs.parameters.recursive}}' + - '--checksum-assets={{inputs.parameters.checksum_assets}}' + - '--checksum-links={{inputs.parameters.checksum_links}}' + - '{{inputs.parameters.uri}}' From 3ee01d21db8e0d86d328bc33693ffbd0869f50e6 Mon Sep 17 00:00:00 2001 From: paulfouquet <86932794+paulfouquet@users.noreply.github.com> Date: Tue, 8 Oct 2024 09:36:36 +1300 Subject: [PATCH 3/4] test: use yaml parser to extract wf script TDE-1280 (#796) #### Motivation Using a yaml parser is safer than splitting on an expected string in the Workflow file. #### Modification - integrate https://github.com/eemeli/yaml library - modify the way to extract a script from a task within a workflow file #### Checklist - [x] Tests updated - [x] Docs updated - [x] Issue linked in Title --- package-lock.json | 3 +- package.json | 3 +- .../common/__test__/exit.handler.test.ts | 36 +++++++++++++++---- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index 1227eabae..9fdbbee74 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,7 +21,8 @@ "cdk8s-cli": "^2.198.228", "cdk8s-plus-30": "^2.2.5", "constructs": "^10.3.0", - "tsx": "^4.6.2" + "tsx": "^4.6.2", + "yaml": "^2.5.1" }, "engines": { "node": "^20.13.1" diff --git a/package.json b/package.json index f9b9be51a..c48686b24 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "cdk8s-cli": "^2.198.228", "cdk8s-plus-30": "^2.2.5", "constructs": "^10.3.0", - "tsx": "^4.6.2" + "tsx": "^4.6.2", + "yaml": "^2.5.1" } } diff --git a/templates/common/__test__/exit.handler.test.ts b/templates/common/__test__/exit.handler.test.ts index 4bba277c2..ab021b36f 100644 --- a/templates/common/__test__/exit.handler.test.ts +++ b/templates/common/__test__/exit.handler.test.ts @@ -2,6 +2,30 @@ import assert from 'node:assert'; import fs from 'node:fs'; import { describe, it } from 'node:test'; +import YAML from 'yaml'; + +/** + * Workflow template type (partial) + */ +type WorkflowTemplate = { spec: { templates: { name: string; script: { source: string } }[] } }; + +/** + * Extract the script of the task named `taskName` from the `workflowTemplate`. + * + * @param workflow Workflow template + * @param taskName the task to extract the script from + * @returns the script of the task + */ +function getScript(workflow: WorkflowTemplate, taskName: string): string { + const template = workflow?.spec?.templates?.find((f) => f.name === taskName); + if (template == null) throw new Error(`Task ${taskName} not found in the workflow`); + + const source = template.script?.source; + if (source == null) throw new Error(`Task ${taskName} has no script`); + + return source; +} + /** * Read the workflow YAML file and create a function from the script inside. * replacing {{ inputs.* }} with ctx @@ -9,12 +33,12 @@ import { describe, it } from 'node:test'; * @param ctx */ function runTestFunction(ctx: { workflowParameters: string; workflowStatus: string }): void { - const func = fs.readFileSync('./templates/common/exit.handler.yml', 'utf-8').split('source: |')[1]; - if (!func) { - throw new Error('No script found in the workflow'); - } - const newFunc = func - // Replace inputs + const wfRaw = fs.readFileSync('./templates/common/exit.handler.yml', 'utf-8'); + const wfTemplate = YAML.parse(wfRaw) as WorkflowTemplate; + const script = getScript(wfTemplate, 'main'); + + // Replace inputs with ctx + const newFunc = script .replace('{{= inputs.parameters.workflow_parameters }}', `${ctx.workflowParameters ?? '[]'}`) .replace('{{inputs.parameters.workflow_status}}', `${ctx.workflowStatus ?? 'Failed'}`); // eslint-disable-next-line @typescript-eslint/no-implied-eval From 9584b691df5a2c489b2a599b72e9ceb996079b46 Mon Sep 17 00:00:00 2001 From: paulfouquet <86932794+paulfouquet@users.noreply.github.com> Date: Tue, 8 Oct 2024 09:57:19 +1300 Subject: [PATCH 4/4] feat: --no_date_in_survey_path handling TDE-1261 (#759) #### Motivation Allowing the generate-path template to use the new flag available https://github.com/linz/argo-tasks/pull/1067 #### Modification add a new parameter to handle the flag #### Checklist _If not applicable, provide explanation of why._ - [ ] Tests updated - [ ] Docs updated - [ ] Issue linked in Title --------- Co-authored-by: Victor Engmark --- templates/argo-tasks/README.md | 2 ++ templates/argo-tasks/generate-path.yml | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/templates/argo-tasks/README.md b/templates/argo-tasks/README.md index 27234f3de..f33a51a16 100644 --- a/templates/argo-tasks/README.md +++ b/templates/argo-tasks/README.md @@ -193,6 +193,8 @@ arguments: value: '{{workflow.parameters.version_argo_tasks}}' - name: target_bucket_name value: '{{inputs.parameters.target_bucket_name}}' + - name: no_date_in_survey_path + value: '{{inputs.parameters.no_date_in_survey_path}}' - name: source value: '{{inputs.parameters.source}}' ``` diff --git a/templates/argo-tasks/generate-path.yml b/templates/argo-tasks/generate-path.yml index 91732efd4..c6ed313a7 100644 --- a/templates/argo-tasks/generate-path.yml +++ b/templates/argo-tasks/generate-path.yml @@ -19,6 +19,9 @@ spec: description: s3 path of source data - name: target_bucket_name description: target bucket name e.g. 'nz-imagery' + - name: no_date_in_survey_path + description: 'If the survey path should not contain the date' + default: 'false' - name: version description: argo-task Container version to use default: 'v4' @@ -33,6 +36,7 @@ spec: - 'generate-path' - '--target-bucket-name' - '{{=sprig.trim(inputs.parameters.target_bucket_name)}}' + - '--no-date-in-survey-path={{=sprig.trim(inputs.parameters.no_date_in_survey_path)}}' - '{{=sprig.trim(inputs.parameters.source)}}' outputs: