-
Notifications
You must be signed in to change notification settings - Fork 50
150 lines (131 loc) · 6.32 KB
/
deploy-to-eks.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
name: Create EKS cluster, deploy CKF and run bundle test
on:
workflow_dispatch: # This event allows manual triggering from the Github UI
inputs:
bundle_version:
description: 'Comma-separated list of bundle versions e.g. "1.7","1.8"'
default: '"1.7","1.8","latest"'
required: true
k8s_version:
description: 'Kubernetes version to be used for the AKS cluster'
required: false
uats_branch:
description: 'Branch to run the UATs from e.g. main or track/1.8. By default, the workflow uses main.'
required: false
schedule:
- cron: "23 0 * * 2"
jobs:
deploy-ckf-to-eks:
runs-on: ubuntu-22.04
strategy:
matrix:
bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.7","1.8","latest"')) }}
fail-fast: false
env:
PREVIOUS_VERSION : "1.7"
CURRENT_VERSION: "1.8"
LATEST: "latest"
K8S_VERSION: ${{ inputs.k8s_version || fromJSON('{$\{\{ env.PREVIOUS_VERSION \}\}:"1.24", $\{\{ env.CURRENT_VERSION \}\}:"1.26", $\{\{ env.LATEST \}\}:"1.29"}')[matrix.bundle_version] }}
JUJU_VERSION: ${{ fromJSON('{"$PREVIOUS_VERSION":"2.9.49","$CURRENT_VERSION":"3.5.0","$LATEST":"3.5.0"}')[ matrix.bundle_version ] }}
UATS_BRANCH: ${{ inputs.uats_branch || fromJSON('{"$PREVIOUS_VERSION":"track/1.7", "$CURRENT_VERSION":"main", "$LATEST":"main"}')[matrix.bundle_version] }}
PYTHON_VERSION: "3.8"
steps:
- name: Checkout repository
uses: actions/checkout@v2
# Remove once https://github.com/canonical/bundle-kubeflow/issues/761
# is resolved and applied to uats repository.
- name: Install python ${{ env.PYTHON_VERSION }}
run: |
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt update -y
sudo apt install python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-distutils python${{ env.PYTHON_VERSION }}-venv -y
- name: Install CLI tools
run: |
wget https://bootstrap.pypa.io/get-pip.py
python${{ env.PYTHON_VERSION }} get-pip.py
python${{ env.PYTHON_VERSION }} -m pip install tox
sudo snap install charmcraft --classic
# We need to install from binrary because of this https://bugs.launchpad.net/juju/+bug/2007848
JUJU_VERSION_WITHOUT_PATCH=${JUJU_VERSION%.*}
curl -LO https://launchpad.net/juju/$JUJU_VERSION_WITHOUT_PATCH/${{ env.JUJU_VERSION }}/+download/juju-${{ env.JUJU_VERSION }}-linux-amd64.tar.xz
tar xf juju-${{ env.JUJU_VERSION }}-linux-amd64.tar.xz
sudo install -o root -g root -m 0755 juju /usr/local/bin/juju
juju version
- name: Configure AWS Credentials
env:
AWS_ACCESS_KEY_ID: ${{ secrets.BUNDLE_KUBEFLOW_EKS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.BUNDLE_KUBEFLOW_EKS_AWS_SECRET_ACCESS_KEY }}
run: |
aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID
aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
aws configure set default.region eu-central-1
- name: Install kubectl
run: |
sudo snap install kubectl --classic --channel=${{ env.K8S_VERSION }}/stable
mkdir ~/.kube
kubectl version --client
- name: Install eksctl
run: |
sudo apt-get update
sudo apt-get install -y unzip
curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp
sudo mv /tmp/eksctl /usr/local/bin
eksctl version
- name: Create cluster
run: |
VERSION=${{ matrix.bundle_version }}
VERSION_WITHOUT_DOT="${VERSION//.}"
yq e ".metadata.name |= \"kubeflow-test-$VERSION_WITHOUT_DOT\"" -i .github/cluster.yaml
yq e ".metadata.version |= \"${{ env.K8S_VERSION }}\"" -i .github/cluster.yaml
eksctl create cluster -f .github/cluster.yaml
kubectl get nodes
- name: Setup juju
run: |
juju add-k8s eks --client
juju bootstrap eks kubeflow-controller
juju add-model kubeflow
- name: Test bundle deployment
run: |
tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s
- name: Run Kubeflow UATs
run: |
git clone https://github.com/canonical/charmed-kubeflow-uats.git ~/charmed-kubeflow-uats
cd ~/charmed-kubeflow-uats
git checkout ${{ env.UATS_BRANCH }}
tox -e kubeflow-remote
# On failure, capture debugging resources
- name: Save debug artifacts
uses: canonical/kubeflow-ci/actions/dump-charm-debug-artifacts@main
if: failure() || cancelled()
# On failure, capture debugging resources
- name: Get juju status
run: juju status
if: failure() || cancelled()
- name: Get juju debug logs
run: juju debug-log --replay --no-tail
if: failure() || cancelled()
- name: Get all kubernetes resources
run: kubectl get all -A
if: failure() || cancelled()
- name: Get logs from pods with status = Pending
run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure() || cancelled()
- name: Get logs from pods with status = Failed
run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure() || cancelled()
- name: Get logs from pods with status = CrashLoopBackOff
run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure() || cancelled()
- name: Delete EKS cluster
if: always()
run: |
VERSION=${{ matrix.bundle_version }}
VERSION_WITHOUT_DOT="${VERSION//.}"
eksctl delete cluster --region eu-central-1 --name=kubeflow-test-$VERSION_WITHOUT_DOT
delete-unattached-volumes:
if: always()
uses: ./.github/workflows/delete-aws-volumes.yaml
secrets: inherit
with:
region: eu-central-1
needs: [deploy-ckf-to-eks]