Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OLM install and upgrade PR check for CodeFlare stack #181

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions .github/actions/kind/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: "Set up KinD"
description: "Step to start and configure KinD cluster"

runs:
using: "composite"
steps:
- name: Init directories
shell: bash
run: |
TEMP_DIR="$(pwd)/tmp"
mkdir -p "${TEMP_DIR}"
echo "TEMP_DIR=${TEMP_DIR}" >> $GITHUB_ENV
mkdir -p "$(pwd)/bin"
echo "$(pwd)/bin" >> $GITHUB_PATH
- name: Container image registry
shell: bash
run: |
podman run -d -p 5000:5000 --name registry registry:2.8.1
export REGISTRY_ADDRESS=$(hostname -i):5000
echo "REGISTRY_ADDRESS=${REGISTRY_ADDRESS}" >> $GITHUB_ENV
echo "Container image registry started at ${REGISTRY_ADDRESS}"
KIND_CONFIG_FILE=${{ env.TEMP_DIR }}/kind.yaml
echo "KIND_CONFIG_FILE=${KIND_CONFIG_FILE}" >> $GITHUB_ENV
envsubst < .github/resources-kind/kind.yaml > ${KIND_CONFIG_FILE}
sudo --preserve-env=REGISTRY_ADDRESS sh -c 'cat > /etc/containers/registries.conf.d/local.conf <<EOF
[[registry]]
prefix = "$REGISTRY_ADDRESS"
insecure = true
location = "$REGISTRY_ADDRESS"
EOF'
- name: Setup KinD cluster
uses: helm/[email protected]
with:
cluster_name: cluster
version: v0.17.0
config: ${{ env.KIND_CONFIG_FILE }}

- name: Print cluster info
shell: bash
run: |
echo "KinD cluster:"
kubectl cluster-info
kubectl describe nodes
File renamed without changes.
12 changes: 12 additions & 0 deletions .github/resources-olm-upgrade/catalogsource.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: operators.coreos.com/v1alpha1
kind: CatalogSource
metadata:
name: codeflare-olm-test
namespace: olm
spec:
displayName: ''
grpcPodConfig:
securityContextConfig: restricted
image: "${CATALOG_BASE_IMG}"
publisher: ''
sourceType: grpc
5 changes: 5 additions & 0 deletions .github/resources-olm-upgrade/operatorgroup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: openshift-operators
namespace: openshift-operators
12 changes: 12 additions & 0 deletions .github/resources-olm-upgrade/subscription.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: codeflare-operator
namespace: openshift-operators
spec:
channel: alpha
installPlanApproval: Automatic
name: codeflare-operator
source: codeflare-olm-test
sourceNamespace: olm
startingCSV: codeflare-operator.${PREVIOUS_VERSION}
42 changes: 2 additions & 40 deletions .github/workflows/e2e_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,6 @@ jobs:
with:
submodules: recursive

- name: Init directories
run: |
TEMP_DIR="$(pwd)/tmp"
mkdir -p "${TEMP_DIR}"
echo "TEMP_DIR=${TEMP_DIR}" >> $GITHUB_ENV

mkdir -p "$(pwd)/bin"
echo "$(pwd)/bin" >> $GITHUB_PATH

- name: Set Go
uses: actions/setup-go@v3
with:
Expand All @@ -73,37 +64,8 @@ jobs:
with:
token: ${{ secrets.GITHUB_TOKEN }}

- name: Container image registry
run: |
podman run -d -p 5000:5000 --name registry registry:2.8.1

export REGISTRY_ADDRESS=$(hostname -i):5000
echo "REGISTRY_ADDRESS=${REGISTRY_ADDRESS}" >> $GITHUB_ENV
echo "Container image registry started at ${REGISTRY_ADDRESS}"

KIND_CONFIG_FILE=${{ env.TEMP_DIR }}/kind.yaml
echo "KIND_CONFIG_FILE=${KIND_CONFIG_FILE}" >> $GITHUB_ENV
envsubst < ./test/e2e/kind.yaml > ${KIND_CONFIG_FILE}

sudo --preserve-env=REGISTRY_ADDRESS sh -c 'cat > /etc/containers/registries.conf.d/local.conf <<EOF
[[registry]]
prefix = "$REGISTRY_ADDRESS"
insecure = true
location = "$REGISTRY_ADDRESS"
EOF'

- name: Setup KinD cluster
uses: helm/[email protected]
with:
cluster_name: cluster
version: v0.17.0
config: ${{ env.KIND_CONFIG_FILE }}

- name: Print cluster info
run: |
echo "KinD cluster:"
kubectl cluster-info
kubectl describe nodes
- name: Setup and start KinD cluster
uses: ./.github/actions/kind

- name: Deploy CodeFlare stack
id: deploy
Expand Down
196 changes: 196 additions & 0 deletions .github/workflows/olm_tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# This workflow will build the CodeFlare Operator image and catalog containing bundle with this image, execute OLM upgrade tests using this catalog

name: OLM Install and Upgrade

on:
pull_request:
branches:
- main
- 'release-*'
paths-ignore:
- 'docs/**'
- '**.adoc'
- '**.md'
- 'LICENSE'

concurrency:
group: ${{ github.head_ref }}-${{ github.workflow }}
cancel-in-progress: true

jobs:
kubernetes:
runs-on: ubuntu-20.04
timeout-minutes: 60
env:
OLM_VERSION: v0.24.0
VERSION: "v0.0.0-ghaction" # Need to supply some semver version for bundle to be properly generated
CATALOG_BASE_IMG: "registry.access.redhat.com/redhat/community-operator-index:v4.13"
CODEFLARE_TEST_TIMEOUT_SHORT: "1m"
CODEFLARE_TEST_TIMEOUT_MEDIUM: "3m"
CODEFLARE_TEST_TIMEOUT_LONG: "10m"

steps:
- name: Cleanup
run: |
ls -lart
echo "Initial status:"
df -h

echo "Cleaning up resources:"
sudo swapoff -a
sudo rm -f /swapfile
sudo apt clean
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
docker rmi $(docker image ls -aq)

echo "Final status:"
df -h

- uses: actions/checkout@v3
with:
fetch-depth: 0 # fetching also previous commits to get tags

- name: Set Go
uses: actions/setup-go@v3
with:
go-version: v1.18

- name: Set up gotestfmt
uses: gotesttools/gotestfmt-action@v2
with:
token: ${{ secrets.GITHUB_TOKEN }}

- name: Store latest tag on branch
run: |
echo "PREVIOUS_VERSION=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV

- name: Setup and start KinD cluster
uses: ./.github/actions/kind

- name: Install OLM
run: |
kubectl create -f https://github.com/operator-framework/operator-lifecycle-manager/releases/download/${OLM_VERSION}/crds.yaml
# wait for a while to be sure CRDs are installed
sleep 1
kubectl create -f https://github.com/operator-framework/operator-lifecycle-manager/releases/download/${OLM_VERSION}/olm.yaml

- name: Create openshift-operator namespace and OperatorGroup
run: |
# Need to use openshift-operator namespace due to https://github.com/project-codeflare/codeflare-operator/issues/161
kubectl create namespace openshift-operators
kubectl create -f .github/resources-olm-upgrade/operatorgroup.yaml

- name: Deploy latest released CodeFlare operator from OLM
id: deploy
run: |
echo Deploying CodeFlare operator using Subscription
envsubst < .github/resources-olm-upgrade/catalogsource.yaml > ${{ env.TEMP_DIR }}/catalogsource.yaml
envsubst < .github/resources-olm-upgrade/subscription.yaml > ${{ env.TEMP_DIR }}/subscription.yaml
kubectl create -f ${{ env.TEMP_DIR }}/catalogsource.yaml
kubectl create -f ${{ env.TEMP_DIR }}/subscription.yaml

echo Waiting for Subscription to be ready
make wait-for-subscription

echo Waiting for Deployment to be ready
make wait-for-deployment -e TIMEOUT=60 -e DEPLOYMENT_NAME="codeflare-operator-manager" -e DEPLOYMENT_NAMESPACE="openshift-operators"

echo Checking that correct CSV is available
CSV_VERSION=$(kubectl get ClusterServiceVersion/codeflare-operator.${PREVIOUS_VERSION} -n openshift-operators -o json | jq -r .spec.version)
if [ "v${CSV_VERSION}" != "${PREVIOUS_VERSION}" ]; then
echo "CSV version v${CSV_VERSION} doesn't match expected version ${PREVIOUS_VERSION}"
exit 1
fi
env:
SUBSCRIPTION_NAME: "codeflare-operator"
SUBSCRIPTION_NAMESPACE: "openshift-operators"

- name: Deploy CodeFlare stack (MCAD, KubeRay)
run: |
make setup-e2e

- name: Run e2e tests
run: |
export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
echo "CODEFLARE_TEST_OUTPUT_DIR=${CODEFLARE_TEST_OUTPUT_DIR}" >> $GITHUB_ENV

set -euo pipefail
go test -timeout 30m -v ./test/e2e -json 2>&1 | tee ${CODEFLARE_TEST_OUTPUT_DIR}/gotest-original.log | gotestfmt

- name: Build operator and catalog image
run: |
make image-push
make bundle-build
make bundle-push
make catalog-build-from-index
make catalog-push
env:
IMG: "${{ env.REGISTRY_ADDRESS }}/codeflare-operator:v0.0.1"
BUNDLE_IMG: "${{ env.REGISTRY_ADDRESS }}/codeflare-operator-bundle:v0.0.1"
CATALOG_IMG: "${{ env.REGISTRY_ADDRESS }}/codeflare-operator-catalog:v0.0.1"
OPM_BUNDLE_OPT: "--use-http"
BUNDLE_PUSH_OPT: "--tls-verify=false"
CATALOG_PUSH_OPT: "--tls-verify=false"

- name: Update Operator to the built version
run: |
ORIGINAL_POD_NAME=$(kubectl get pod -l app.kubernetes.io/name=codeflare-operator -n openshift-operators -o json | jq -r .items[].metadata.name)
echo "Running old operator pod name is ${ORIGINAL_POD_NAME}"

echo Updating custom CatalogSource image to the built CatalogSource with latest operator
kubectl patch CatalogSource codeflare-olm-test -n olm --type merge --patch "{\"spec\":{\"image\":\"${CATALOG_IMG}\"}}"

echo Waiting for previous operator pod to get deleted
kubectl wait --timeout=120s --for=delete pod/${ORIGINAL_POD_NAME} -n openshift-operators

echo Waiting for Subscription to be ready
make wait-for-subscription

echo Waiting for Deployment to be ready
make wait-for-deployment -e TIMEOUT=60 -e DEPLOYMENT_NAME="codeflare-operator-manager" -e DEPLOYMENT_NAMESPACE="openshift-operators"

echo Checking that correct CSV is available
CSV_VERSION=$(kubectl get ClusterServiceVersion/codeflare-operator.${VERSION} -n openshift-operators -o json | jq -r .spec.version)
if [ "v${CSV_VERSION}" != "${VERSION}" ]; then
echo "CSV version v${CSV_VERSION} doesn't match expected version ${VERSION}"
exit 1
fi
env:
CATALOG_IMG: "${{ env.REGISTRY_ADDRESS }}/codeflare-operator-catalog:v0.0.1"
SUBSCRIPTION_NAME: "codeflare-operator"
SUBSCRIPTION_NAMESPACE: "openshift-operators"

- name: Run e2e tests against built operator
run: |
set -euo pipefail
go test -timeout 30m -v ./test/e2e -json 2>&1 | tee ${CODEFLARE_TEST_OUTPUT_DIR}/gotest-upgraded.log | gotestfmt

- name: Print CodeFlare operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing CodeFlare operator logs"
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${CODEFLARE_TEST_OUTPUT_DIR}/codeflare-operator.log

- name: Print MCAD controller logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing MCAD controller logs"
kubectl logs -n codeflare-system --tail -1 -l component=multi-cluster-application-dispatcher | tee ${CODEFLARE_TEST_OUTPUT_DIR}/mcad.log

- name: Print KubeRay operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing KubeRay operator logs"
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR}/kuberay.log

- name: Upload logs
uses: actions/upload-artifact@v3
if: always() && steps.deploy.outcome == 'success'
with:
name: logs
retention-days: 10
path: |
${{ env.CODEFLARE_TEST_OUTPUT_DIR }}/**/*.log
30 changes: 28 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ bundle-build: bundle ## Build the bundle image.

.PHONY: bundle-push
bundle-push: ## Push the bundle image.
$(MAKE) image-push IMG=$(BUNDLE_IMG)
podman push $(BUNDLE_IMG) $(BUNDLE_PUSH_OPT)

.PHONY: openshift-community-operator-release
openshift-community-operator-release: install-gh-cli bundle ## build bundle and create PR in OpenShift community operators repository
Expand Down Expand Up @@ -413,10 +413,21 @@ endif
catalog-build: opm ## Build a catalog image.
$(OPM) index add --container-tool podman --mode semver --tag $(CATALOG_IMG) --bundles $(BUNDLE_IMGS) $(FROM_INDEX_OPT)

# Build a catalog image by adding bundle images to existing catalog using the operator package manager tool, 'opm'.
.PHONY: catalog-build-from-index
catalog-build-from-index: opm ## Build a catalog image.
mkdir catalog
$(OPM) render $(CATALOG_BASE_IMG) -o yaml > catalog/bundles.yaml
$(OPM) render $(BUNDLE_IMG) $(OPM_BUNDLE_OPT) > catalog/codeflare-operator-bundle.yaml
sed -i -E "s/(.*)(- name: codeflare-operator.$(PREVIOUS_VERSION).*)/\1- name: codeflare-operator.$(VERSION)\n replaces: codeflare-operator.$(PREVIOUS_VERSION)\n\2/" catalog/bundles.yaml
$(OPM) validate catalog
$(OPM) generate dockerfile catalog
podman build . -f catalog.Dockerfile -t $(CATALOG_IMG)

# Push the catalog image.
.PHONY: catalog-push
catalog-push: ## Push a catalog image.
$(MAKE) image-push IMG=$(CATALOG_IMG)
podman push $(CATALOG_IMG) $(CATALOG_PUSH_OPT)

.PHONY: test-unit
test-unit: defaults manifests generate fmt vet envtest ## Run unit tests.
Expand All @@ -429,3 +440,18 @@ test-e2e: defaults manifests generate fmt vet ## Run e2e tests.
.PHONY: setup-e2e
setup-e2e: ## Set up e2e tests.
KUBERAY_VERSION=$(KUBERAY_VERSION) test/e2e/setup.sh

# Wait until Subscription is in "AtLatestKnown" state
.PHONY: wait-for-subscription
wait-for-subscription:
timeout 300 bash -c 'while [[ "$$(kubectl get subscription/'$(SUBSCRIPTION_NAME)' -n '$(SUBSCRIPTION_NAMESPACE)' -o json | jq -r .status.state)" != "AtLatestKnown" ]]; do sleep 5 && echo "$$(kubectl get subscription/'$(SUBSCRIPTION_NAME)' -n '$(SUBSCRIPTION_NAMESPACE)' -o json | jq -r .status.state)" ; done'

# Default timeout for waiting actions
TIMEOUT ?= 180

# Wait until Deployment is in "Available" state
.PHONY: wait-for-deployment
wait-for-deployment:
# Wait until Deployment exists first, then use kubectl wait
timeout $(TIMEOUT) bash -c 'until [[ $$(kubectl get deployment/'$(DEPLOYMENT_NAME)' -n '$(DEPLOYMENT_NAMESPACE)') ]]; do sleep 5 && echo "$$(kubectl get deployment/'$(DEPLOYMENT_NAME)' -n '$(DEPLOYMENT_NAMESPACE)')"; done'
kubectl wait --timeout=$(TIMEOUT)s --for=condition=Available=true deployment/$(DEPLOYMENT_NAME) -n $(DEPLOYMENT_NAMESPACE)