Skip to content

Commit b30cb4e

Browse files
authored
SPLAT-2171: Added AWS dedicated host scripts and job (#70274)
* Added provision/deprovision logic for dedicated hosts * Added AWS dedicated hosts job
1 parent 45ac318 commit b30cb4e

File tree

8 files changed

+187
-0
lines changed

8 files changed

+187
-0
lines changed

ci-operator/config/openshift/release/openshift-release-master__nightly-4.21.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,25 @@ tests:
833833
FEATURE_SET: TechPreviewNoUpgrade
834834
workflow: openshift-e2e-vsphere
835835
timeout: 6h0m0s
836+
- as: e2e-aws-ovn-dedicated
837+
cron: '@yearly'
838+
steps:
839+
cluster_profile: aws
840+
env:
841+
DEDICATED_HOST: "yes"
842+
leases:
843+
- env: LEASED_RESOURCE
844+
resource_type: aws-edge-zones-quota-slice
845+
observers:
846+
enable:
847+
- observers-resource-watch
848+
post:
849+
- chain: gather-network
850+
- chain: gather-core-dump
851+
- chain: ipi-deprovision
852+
- ref: ipi-deprovision-aws-dedicated-hosts
853+
workflow: openshift-e2e-aws-ovn
854+
timeout: 6h0m0s
836855
- as: e2e-aws-ovn-fips
837856
interval: 168h
838857
steps:

ci-operator/jobs/openshift/release/openshift-release-master-periodics.yaml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162722,6 +162722,83 @@ periodics:
162722162722
- name: result-aggregator
162723162723
secret:
162724162724
secretName: result-aggregator
162725+
- agent: kubernetes
162726+
cluster: build11
162727+
cron: '@yearly'
162728+
decorate: true
162729+
decoration_config:
162730+
skip_cloning: true
162731+
timeout: 6h0m0s
162732+
extra_refs:
162733+
- base_ref: master
162734+
org: openshift
162735+
repo: release
162736+
labels:
162737+
ci-operator.openshift.io/cloud: aws
162738+
ci-operator.openshift.io/cloud-cluster-profile: aws
162739+
ci-operator.openshift.io/variant: nightly-4.21
162740+
ci.openshift.io/generator: prowgen
162741+
ci.openshift.io/no-builds: "true"
162742+
job-release: "4.21"
162743+
pj-rehearse.openshift.io/can-be-rehearsed: "true"
162744+
name: periodic-ci-openshift-release-master-nightly-4.21-e2e-aws-ovn-dedicated
162745+
spec:
162746+
containers:
162747+
- args:
162748+
- --gcs-upload-secret=/secrets/gcs/service-account.json
162749+
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
162750+
- --lease-server-credentials-file=/etc/boskos/credentials
162751+
- --report-credentials-file=/etc/report/credentials
162752+
- --secret-dir=/secrets/ci-pull-credentials
162753+
- --target=e2e-aws-ovn-dedicated
162754+
- --variant=nightly-4.21
162755+
command:
162756+
- ci-operator
162757+
image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest
162758+
imagePullPolicy: Always
162759+
name: ""
162760+
resources:
162761+
requests:
162762+
cpu: 10m
162763+
volumeMounts:
162764+
- mountPath: /etc/boskos
162765+
name: boskos
162766+
readOnly: true
162767+
- mountPath: /secrets/ci-pull-credentials
162768+
name: ci-pull-credentials
162769+
readOnly: true
162770+
- mountPath: /secrets/gcs
162771+
name: gcs-credentials
162772+
readOnly: true
162773+
- mountPath: /secrets/manifest-tool
162774+
name: manifest-tool-local-pusher
162775+
readOnly: true
162776+
- mountPath: /etc/pull-secret
162777+
name: pull-secret
162778+
readOnly: true
162779+
- mountPath: /etc/report
162780+
name: result-aggregator
162781+
readOnly: true
162782+
serviceAccountName: ci-operator
162783+
volumes:
162784+
- name: boskos
162785+
secret:
162786+
items:
162787+
- key: credentials
162788+
path: credentials
162789+
secretName: boskos-credentials
162790+
- name: ci-pull-credentials
162791+
secret:
162792+
secretName: ci-pull-credentials
162793+
- name: manifest-tool-local-pusher
162794+
secret:
162795+
secretName: manifest-tool-local-pusher
162796+
- name: pull-secret
162797+
secret:
162798+
secretName: registry-pull-credentials
162799+
- name: result-aggregator
162800+
secret:
162801+
secretName: result-aggregator
162725162802
- agent: kubernetes
162726162803
cluster: build11
162727162804
cron: '@weekly'

ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-commands.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,43 @@ platform:
386386
EOF
387387
yq-go m -a -x -i "${CONFIG}" "${patch_user_provisioned_dns}"
388388
fi
389+
390+
# Add config for dedicated hosts to compute nodes if job is configured
391+
if [[ "${DEDICATED_HOST}" == "yes" ]]; then
392+
echo "Detected dedicated host configured. Starting install-config patching."
393+
patch_dedicated_host="${SHARED_DIR}/install-config-dedicated-host.yaml.patch"
394+
395+
# Create Host for each zone. If no zones configured, error out. Zones can exist before script execution so we'll pull zone listing out for workers.
396+
WORKER_ZONES=$(cat "${CONFIG}" | yq-v4 '.compute[] | select(.name == "worker") | .platform.aws.zones'[] )
397+
if [[ "${WORKER_ZONES}" == "" ]]; then
398+
echo "No zones configured, Unable to determine where to create dedicated hosts."
399+
exit
400+
fi
401+
402+
cat > "${patch_dedicated_host}" << EOF
403+
compute:
404+
- name: worker
405+
platform:
406+
aws:
407+
dedicatedHosts:
408+
hostAffinity: Host
409+
hosts: []
410+
EOF
411+
412+
for zone in ${WORKER_ZONES}; do
413+
HOST_TYPE=$(echo "${COMPUTE_NODE_TYPE}" | cut -d'.' -f1)
414+
echo "Creating dedicated host. Region='${aws_source_region}' Zone='${zone}' InstanceFamily='${HOST_TYPE}'"
415+
416+
EXPIRATION_DATE=$(date -d '6 hours' --iso=minutes --utc)
417+
HOST_SPECS='{"ResourceType":"dedicated-host","Tags":[{"Key":"Name","Value":"'${JOB_NAME_SAFE}'-'${zone}'"},{"Key":"CI-JOB","Value":"'${JOB_NAME_SAFE}'"},{"Key":"expirationDate","Value":"'${EXPIRATION_DATE}'"},{"Key":"ci-build-info","Value":"'${BUILD_ID}_${JOB_NAME}'"}]}'
418+
HOST_ID=$(aws ec2 allocate-hosts --instance-type "${HOST_TYPE}.4xlarge" --auto-placement 'off' --host-recovery 'off' --tag-specifications "${HOST_SPECS}" --host-maintenance 'off' --quantity '1' --availability-zone "${zone}" --region "${aws_source_region}" | jq -r '.HostIds[0]')
419+
420+
# We need to pass in the vars since YQ doesnt see the loop variables
421+
ZONE_NAME="${zone}" HOST_ID="${HOST_ID}" yq-v4 -i '.compute[] |= (select(.name == "worker") | .platform.aws.dedicatedHosts.hosts += [ { "id": strenv(HOST_ID), "zone": strenv(ZONE_NAME) } ])' "${patch_dedicated_host}"
422+
done
423+
424+
# Update config with host ID
425+
echo "Patching install-config.yaml for dedicated hosts."
426+
yq-go m -x -i ${CONFIG} ${patch_dedicated_host}
427+
cp "${patch_dedicated_host}" "${ARTIFACT_DIR}/"
428+
fi

ci-operator/step-registry/ipi/conf/aws/ipi-conf-aws-ref.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,5 +106,10 @@ ref:
106106
documentation: |-
107107
Experimental feature allowing jobs to use NAT instances instead of NAT gateways, in certain accounts, for cost
108108
reduction purposes.
109+
- name: DEDICATED_HOST
110+
default: "no"
111+
documentation: |-
112+
Allows users to enable configuration of dedicated hosts for compute nodes. Valid options are "yes" and "no". When "yes", the
113+
configuration will create a dedicated host for each zone the "worker" compute pool has configured.
109114
documentation: |-
110115
The IPI AWS configure step generates the AWS-specific install-config.yaml contents based on the cluster profile and optional input files.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../OWNERS
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
set -o nounset
3+
set -o errexit
4+
set -o pipefail
5+
6+
export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred"
7+
CONFIG="${SHARED_DIR}/install-config.yaml"
8+
patch_dedicated_host="${SHARED_DIR}/install-config-dedicated-host.yaml.patch"
9+
10+
if test ! -f "${patch_dedicated_host}"
11+
then
12+
echo "No dedicated hosts patch file found, so assuming patch never occurred."
13+
exit 0
14+
fi
15+
16+
echo "Deprovisioning dedicated hosts..."
17+
18+
# We get the region information from the install-config.yaml. For the dedicated hosts, we are pulling from the patch file in
19+
# the event that an error occurred during creation of the dedicated host.
20+
REGION=$(yq-v4 -r '.platform.aws.region' ${CONFIG})
21+
for HOST in $(yq-v4 -r '.compute[] | select(.name == "worker") | .platform.aws.dedicatedHosts.hosts[] | .id' "${patch_dedicated_host}"); do
22+
echo "Release host ${HOST}"
23+
aws ec2 release-hosts --region "${REGION}" --host-ids "${HOST}"
24+
done
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"path": "ipi/deprovision/aws/dedicated-hosts/ipi-deprovision-aws-dedicated-hosts-ref.yaml",
3+
"owners": {
4+
"approvers": [
5+
"jhixson74",
6+
"patrickdillon",
7+
"barbacbd"
8+
]
9+
}
10+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
ref:
2+
as: ipi-deprovision-aws-dedicated-hosts
3+
from: upi-installer
4+
grace_period: 10m
5+
commands: ipi-deprovision-aws-dedicated-hosts-commands.sh
6+
resources:
7+
requests:
8+
cpu: 300m
9+
memory: 300Mi
10+
documentation: |-
11+
This deprovision step tears down any dedicated hosts that were provisioned for AWS IPI dedicated host feature.

0 commit comments

Comments
 (0)