From 00ec1ef1b98ffb3a9e2b1549d12761daaec49ec1 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Mon, 19 Sep 2022 18:00:40 +0200 Subject: [PATCH 1/7] Add hacking files This patch adds a series of files to the `hack` directory to facilitate the development. These files include: - Helper functions - Script to deploy a local toy Ceph cluster - Script to create the LVM loopback device inside the OpenShift VM - OpenStack and Cinder client installation and configuration files - Manifest for OpenStack with Cinder and Glance using local Ceph cluster --- hack/README.md | 59 +++++++++++ hack/dev/admin-rc | 23 +++++ hack/dev/ceph/I_AM_A_DEMO | 0 hack/dev/ceph/ceph.client.admin.keyring | 6 ++ hack/dev/ceph/ceph.conf | 16 +++ hack/dev/ceph/ceph.mon.keyring | 9 ++ hack/dev/clouds.yaml | 12 +++ hack/dev/create-ceph.sh | 15 +++ hack/dev/create-lvm.sh | 19 ++++ hack/dev/helpers.sh | 13 +++ hack/dev/openstack-ceph.yaml | 114 +++++++++++++++++++++ hack/dev/openstack-lvm-ceph.yaml | 131 ++++++++++++++++++++++++ hack/dev/osp-clients-cfg.sh | 18 ++++ 13 files changed, 435 insertions(+) create mode 100644 hack/README.md create mode 100644 hack/dev/admin-rc create mode 100644 hack/dev/ceph/I_AM_A_DEMO create mode 100644 hack/dev/ceph/ceph.client.admin.keyring create mode 100644 hack/dev/ceph/ceph.conf create mode 100644 hack/dev/ceph/ceph.mon.keyring create mode 100644 hack/dev/clouds.yaml create mode 100755 hack/dev/create-ceph.sh create mode 100755 hack/dev/create-lvm.sh create mode 100644 hack/dev/helpers.sh create mode 100644 hack/dev/openstack-ceph.yaml create mode 100644 hack/dev/openstack-lvm-ceph.yaml create mode 100644 hack/dev/osp-clients-cfg.sh diff --git a/hack/README.md b/hack/README.md new file mode 100644 index 00000000..bbd0c223 --- /dev/null +++ b/hack/README.md @@ -0,0 +1,59 @@ +# Hacking + +### Ceph cluster + +As describe in the [Getting Started Guide](../README.md#getting-started), the +`dev/create-ceph.sh` script can help us create a *toy* Ceph cluster we can use +for development. + +### LVM backend + +Similar to the script that creates a *toy* Ceph backend there is also a script +called `dev/create-lvm.sh` that create an LVM Cinder VG, within the CRC VM, +that can be used by the Cinder LVM backend driver. + +### Helpers + +If we source `hack/dev/helpers.sh` we'll get a couple of helper functions: + +- `crc_login`: To login to the OpenShift cluster. +- `crc_ssh`: To SSH to the OpenShift VM or to run SSH commands in it. + +### SSH OpenShift VM + +We can SSH into the OpenShift VM multiple ways: Using `oc debug`, using `ssh`, +or using the `virsh console`. + +With `oc debug`: + +```sh +$ oc get node +NAME STATUS ROLES AGE VERSION +crc-p9hmx-master-0 Ready master,worker 26d v1.24.0+4f0dd4d + +$ oc debug node/crc-p9hmx-master-0 + +sh-4.4# chroot /host +``` + +To use `ssh` we can do: + +```sh +$ ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.crc/machines/crc/id_ecdsa core@`crc ip` + +[core@crc-p9hmx-master-0 ~]$ +``` + +Or we can use the helper function defined before to just do `crc_ssh`. + +### Containers in VM + +The OpenShift VM runs CoreOS and uses [Cri-O](https://cri-o.io/) as the +container runtime, so once we are inside the container we need to use `crictl` +to interact with the containers: + +```sh +[core@crc-p9hmx-master-0 ~]$ sudo crictl ps +``` + +And its configuration files are under `/etc/containers`. diff --git a/hack/dev/admin-rc b/hack/dev/admin-rc new file mode 100644 index 00000000..e366b614 --- /dev/null +++ b/hack/dev/admin-rc @@ -0,0 +1,23 @@ +# Clear any old environment that may conflict. +for key in $( set | awk -F= '/^OS_/ {print $1}' ); do unset "${key}" ; done + +export OS_AUTH_TYPE=password +export OS_PASSWORD=12345678 +export OS_AUTH_URL=http://keystone-public-openstack.apps-crc.testing +export OS_SYSTEM_SCOPE=all +export OS_USERNAME=admin +export OS_PROJECT_NAME=admin +export COMPUTE_API_VERSION=1.1 +export NOVA_VERSION=1.1 +export OS_NO_CACHE=True +export OS_CLOUDNAME=default +export OS_IDENTITY_API_VERSION='3' +export OS_USER_DOMAIN_NAME='Default' +export OS_PROJECT_DOMAIN_NAME='Default' +export OS_CACERT="/etc/pki/ca-trust/source/anchors/cm-local-ca.pem" +# Add OS_CLOUDNAME to PS1 +if [ -z "${CLOUDPROMPT_ENABLED:-}" ]; then + export PS1=${PS1:-""} + export PS1=\${OS_CLOUDNAME:+"(\$OS_CLOUDNAME)"}\ $PS1 + export CLOUDPROMPT_ENABLED=1 +fi diff --git a/hack/dev/ceph/I_AM_A_DEMO b/hack/dev/ceph/I_AM_A_DEMO new file mode 100644 index 00000000..e69de29b diff --git a/hack/dev/ceph/ceph.client.admin.keyring b/hack/dev/ceph/ceph.client.admin.keyring new file mode 100644 index 00000000..a07f2077 --- /dev/null +++ b/hack/dev/ceph/ceph.client.admin.keyring @@ -0,0 +1,6 @@ +[client.admin] + key = AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== + caps mds = "allow *" + caps mgr = "allow *" + caps mon = "allow *" + caps osd = "allow *" diff --git a/hack/dev/ceph/ceph.conf b/hack/dev/ceph/ceph.conf new file mode 100644 index 00000000..6662ff1f --- /dev/null +++ b/hack/dev/ceph/ceph.conf @@ -0,0 +1,16 @@ +[global] +fsid = 5fe62cc7-0392-4a32-8466-081ce0ea970f +mon initial members = localhost +mon host = v2:192.168.130.1:3300/0 +osd crush chooseleaf type = 0 +osd journal size = 100 +public network = 0.0.0.0/0 +cluster network = 0.0.0.0/0 +osd pool default size = 1 +mon warn on pool no redundancy = false +auth allow insecure global id reclaim = false +osd objectstore = bluestore + +[osd.0] +osd data = /var/lib/ceph/osd/ceph-0 + diff --git a/hack/dev/ceph/ceph.mon.keyring b/hack/dev/ceph/ceph.mon.keyring new file mode 100644 index 00000000..2724a18d --- /dev/null +++ b/hack/dev/ceph/ceph.mon.keyring @@ -0,0 +1,9 @@ +[mon.] + key = AQBCtBhjSAiDFhAAiNDfWsKMES1krJAye5sk0Q== + caps mon = "allow *" +[client.admin] + key = AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== + caps mds = "allow *" + caps mgr = "allow *" + caps mon = "allow *" + caps osd = "allow *" diff --git a/hack/dev/clouds.yaml b/hack/dev/clouds.yaml new file mode 100644 index 00000000..e7b897aa --- /dev/null +++ b/hack/dev/clouds.yaml @@ -0,0 +1,12 @@ +clouds: + default: + auth: + auth_url: http://keystone-public-openstack.apps-crc.testing + project_name: admin + username: admin + user_domain_name: Default + project_domain_name: Default + region_name: regionOne + verify: false + identity_api_version: '3' + diff --git a/hack/dev/create-ceph.sh b/hack/dev/create-ceph.sh new file mode 100755 index 00000000..f1871d9f --- /dev/null +++ b/hack/dev/create-ceph.sh @@ -0,0 +1,15 @@ +#!/bin/env bash +LOCATION=$(realpath `dirname -- $BASH_SOURCE[0]`) +sudo cp -R "${LOCATION}/ceph" /etc + +# Change Ceph default features (if we want to attach using krbd) +# echo -e "\nrbd default features = 3" | sudo tee -a /etc/ceph/ceph.conf + +echo 'Running ceph Pacific demo cluster' +sudo podman run -d --name ceph --net=host -v /etc/ceph:/etc/ceph:z -v /lib/modules:/lib/modules -e MON_IP=192.168.130.1 -e CEPH_PUBLIC_NETWORK=0.0.0.0/0 -e DEMO_DAEMONS='osd' quay.io/ceph/daemon:latest-pacific demo + +sleep 3 + +sudo podman exec -it ceph bash -c 'ceph osd pool create volumes 4 && ceph osd pool application enable volumes rbd' +sudo podman exec -it ceph bash -c 'ceph osd pool create backups 4 && ceph osd pool application enable backups rbd' +sudo podman exec -it ceph bash -c 'ceph osd pool create images 4 && ceph osd pool application enable images rgw' diff --git a/hack/dev/create-lvm.sh b/hack/dev/create-lvm.sh new file mode 100755 index 00000000..dbf74da8 --- /dev/null +++ b/hack/dev/create-lvm.sh @@ -0,0 +1,19 @@ +#!/bin/env bash +set -ev +set -x + +LOCATION=$(realpath `dirname -- $BASH_SOURCE[0]`) +source "$LOCATION/helpers.sh" + +# Enable iSCSI because it always fails to start for some reason, and just to be +# extra sure create the initiator name if it doesn't exist. +crc_ssh 'if [[ ! -e /etc/iscsi/initiatorname.iscsi ]]; then echo InitiatorName=`iscsi-iname` | sudo tee /etc/iscsi/initiatorname.iscsi; fi; if ! systemctl --no-pager status iscsid; then sudo systemctl restart iscsid; fi' + +# Multipath failed to start because it doesn't have a configuration, create it +# and restart the service +crc_ssh 'if [[ ! -e /etc/multipath.conf ]]; then sudo mpathconf --enable --with_multipathd y --user_friendly_names n --find_multipaths y && sudo systemctl start multipathd; fi' + +loopback_file="/var/home/core/cinder-volumes" +echo Creating $loopback_file in CRC VM +crc_ssh "if [[ ! -e $loopback_file ]]; then truncate -s 10G $loopback_file; fi" +crc_ssh "if ! sudo vgdisplay cinder-volumes; then sudo vgcreate cinder-volumes \`sudo losetup --show -f $loopback_file\` && sudo vgscan; fi" diff --git a/hack/dev/helpers.sh b/hack/dev/helpers.sh new file mode 100644 index 00000000..bf19805c --- /dev/null +++ b/hack/dev/helpers.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +function crc_ssh { + SSH_PARAMS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.crc/machines/crc/id_ecdsa" + SSH_REMOTE="core@`crc ip`" + ssh $SSH_PARAMS $SSH_REMOTE "$@" +} + + +function crc_login { + echo Logging in + oc login -u kubeadmin -p 12345678 https://api.crc.testing:6443 +} diff --git a/hack/dev/openstack-ceph.yaml b/hack/dev/openstack-ceph.yaml new file mode 100644 index 00000000..224d400d --- /dev/null +++ b/hack/dev/openstack-ceph.yaml @@ -0,0 +1,114 @@ +apiVersion: core.openstack.org/v1beta1 +kind: OpenStackControlPlane +metadata: + name: openstack +spec: + secret: osp-secret + storageClass: local-storage + keystone: + template: + containerImage: quay.io/tripleozedcentos9/openstack-keystone:current-tripleo + databaseInstance: openstack + secret: osp-secret + mariadb: + template: + containerImage: quay.io/tripleozedcentos9/openstack-mariadb:current-tripleo + storageRequest: 500M + rabbitmq: + template: + replicas: 1 + #resources: + # requests: + # cpu: 500m + # memory: 1Gi + # limits: + # cpu: 800m + # memory: 1Gi + placement: + enabled: false + template: + containerImage: quay.io/tripleozedcentos9/openstack-placement-api:current-tripleo + databaseInstance: openstack + secret: osp-secret + glance: + template: + serviceUser: glance + databaseInstance: openstack + databaseUser: glance + secret: osp-secret + storageClass: "" + storageRequest: 1G + customServiceConfig: | + [DEFAULT] + debug = true + glanceAPIInternal: + debug: + service: false + preserveJobs: false + replicas: 1 + glanceAPIExternal: + debug: + service: false + preserveJobs: false + replicas: 1 + cephBackend: + cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f + cephMons: 192.168.130.1 + cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== + cephUser: admin + cephPools: + glance: + name: images + cinder: + enabled: true + template: + rabbitMqClusterName: rabbitmq + customServiceConfig: | + [DEFAULT] + debug = true + databaseInstance: openstack + databaseUser: cinder + cinderAPI: + replicas: 1 + containerImage: quay.io/tripleozedcentos9/openstack-cinder-api:current-tripleo + debug: + initContainer: false + service: false + cinderScheduler: + replicas: 1 + containerImage: quay.io/tripleozedcentos9/openstack-cinder-scheduler:current-tripleo + debug: + initContainer: false + service: false + cinderBackup: + replicas: 1 + containerImage: quay.io/tripleozedcentos9/openstack-cinder-backup:current-tripleo + customServiceConfig: | + [DEFAULT] + backup_driver = cinder.backup.drivers.ceph.CephBackupDriver + backup_ceph_pool = backups + backup_ceph_user = admin + debug: + initContainer: false + service: false + secret: osp-secret + cinderVolumes: + ceph: + containerImage: quay.io/tripleozedcentos9/openstack-cinder-volume:current-tripleo + replicas: 1 + customServiceConfig: | + [DEFAULT] + enabled_backends=ceph + debug: + initContainer: false + service: false + cephBackend: + cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f + cephMons: 192.168.130.1 + cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== + cephUser: admin + cephPools: + cinder: + name: volumes + cinder_backup: + name: backup diff --git a/hack/dev/openstack-lvm-ceph.yaml b/hack/dev/openstack-lvm-ceph.yaml new file mode 100644 index 00000000..f4c20a77 --- /dev/null +++ b/hack/dev/openstack-lvm-ceph.yaml @@ -0,0 +1,131 @@ +apiVersion: core.openstack.org/v1beta1 +kind: OpenStackControlPlane +metadata: + name: openstack +spec: + secret: osp-secret + storageClass: local-storage + keystone: + template: + containerImage: quay.io/tripleozedcentos9/openstack-keystone:current-tripleo + databaseInstance: openstack + secret: osp-secret + mariadb: + template: + containerImage: quay.io/tripleozedcentos9/openstack-mariadb:current-tripleo + storageRequest: 500M + rabbitmq: + template: + replicas: 1 + #resources: + # requests: + # cpu: 500m + # memory: 1Gi + # limits: + # cpu: 800m + # memory: 1Gi + placement: + enabled: false + template: + containerImage: quay.io/tripleozedcentos9/openstack-placement-api:current-tripleo + databaseInstance: openstack + secret: osp-secret + glance: + template: + serviceUser: glance + databaseInstance: openstack + databaseUser: glance + secret: osp-secret + storageClass: "" + storageRequest: 1G + customServiceConfig: | + [DEFAULT] + debug = true + glanceAPIInternal: + debug: + service: false + preserveJobs: false + replicas: 1 + glanceAPIExternal: + debug: + service: false + preserveJobs: false + replicas: 1 + cephBackend: + cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f + cephMons: 192.168.130.1 + cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== + cephUser: admin + cephPools: + glance: + name: images + cinder: + enabled: true + template: + rabbitMqClusterName: rabbitmq + customServiceConfig: | + [DEFAULT] + debug = true + databaseInstance: openstack + databaseUser: cinder + cinderAPI: + replicas: 1 + containerImage: quay.io/tripleozedcentos9/openstack-cinder-api:current-tripleo + debug: + initContainer: false + service: false + cinderScheduler: + replicas: 1 + containerImage: quay.io/tripleozedcentos9/openstack-cinder-scheduler:current-tripleo + debug: + initContainer: false + service: false + cinderBackup: + replicas: 1 + containerImage: quay.io/tripleozedcentos9/openstack-cinder-backup:current-tripleo + customServiceConfig: | + [DEFAULT] + backup_driver = cinder.backup.drivers.ceph.CephBackupDriver + backup_ceph_pool = backups + backup_ceph_user = admin + debug: + initContainer: false + service: false + secret: osp-secret + cinderVolumes: + lvm: + containerImage: quay.io/tripleozedcentos9/openstack-cinder-volume:current-tripleo + replicas: 1 + customServiceConfig: | + [DEFAULT] + enabled_backends=lvm + [lvm] + image_volume_cache_enabled=false + use_multipath_for_image_xfer=true + volume_driver=cinder.volume.drivers.lvm.LVMVolumeDriver + volume_group=cinder-volumes + target_protocol=iscsi + target_helper=lioadm + volume_backend_name=lvm_iscsi + debug: + initContainer: false + service: false + ceph: + containerImage: quay.io/tripleozedcentos9/openstack-cinder-volume:current-tripleo + replicas: 1 + customServiceConfig: | + [DEFAULT] + enabled_backends=ceph + debug: + initContainer: false + service: false + cephBackend: + cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f + cephMons: 192.168.130.1 + cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== + cephUser: admin + cephPools: + cinder: + name: volumes + cinder_backup: + name: backup diff --git a/hack/dev/osp-clients-cfg.sh b/hack/dev/osp-clients-cfg.sh new file mode 100644 index 00000000..1edc8dcf --- /dev/null +++ b/hack/dev/osp-clients-cfg.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +LOCATION=$(realpath `dirname -- $BASH_SOURCE[0]`) + +mkdir -p ~/.config/openstack + +if [[ ! -e ~/.config/openstack/clouds.yaml ]]; then + cp $LOCATION/clouds.yaml ~/.config/openstack/clouds.yaml +fi + +# Install OSC because some services no longer have individual clients +# This installs the cinderclient as well, which has feature parity with cinder +pip3 install openstackclient + +export OS_CLOUD=default +export OS_PASSWORD=12345678 + +source $LOCATION/admin-rc From 6975bbb776a00d60a9219315a5ff6fa0c719ec80 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Fri, 25 Nov 2022 19:19:14 +0100 Subject: [PATCH 2/7] Hack: Testing PR helpers This patch adds some scripts to facilitate the testing of PRs with dependencies. Four scripts are introduced: - hack/cleandeps.sh: Cleans up contents of go.work - hack/showdeps.py: Show the dependencies of a PR based on its commit messages contents (Depends-On:) - hack/setdeps.py: Replaces modules in the go workspace with local directories, PRs, remote branches, etc. - hack/checkout_pr.sh: Leverages the other scripts to do the checkout of a PR and replace the dependencies as needed. --- hack/README.md | 26 +++++++++ hack/checkout_pr.sh | 30 ++++++++++ hack/cleandeps.sh | 4 ++ hack/setdeps.py | 131 ++++++++++++++++++++++++++++++++++++++++++++ hack/showdeps.py | 97 ++++++++++++++++++++++++++++++++ 5 files changed, 288 insertions(+) create mode 100755 hack/checkout_pr.sh create mode 100755 hack/cleandeps.sh create mode 100755 hack/setdeps.py create mode 100755 hack/showdeps.py diff --git a/hack/README.md b/hack/README.md index bbd0c223..295c24c5 100644 --- a/hack/README.md +++ b/hack/README.md @@ -1,5 +1,31 @@ # Hacking +### Testing PRs + +To facilitate testing PRs we have the `hack/checkout_pr.sh` script that will +checkout the specified cinder PR into local branch pr#, then recursively check +the dependencies using the PRs commit messages and then replace modules in our +go workspace to use those dependencies. + +Example to get the PR#65 from cinder-operator: + +```sh +$ hack/checkout_pr.sh 65 +Cleaning go.work +Fetching PR 65 on upstream/pr65 +Checking PR dependecies +Setting dependencies: lib-common=88 +Source for lib-common PR#88 is github.com/fmount/lib-common@extra_volumes +Checking the go mod version for branch @extra_volumes +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/common=github.com/fmount/lib-common/modules/common@v0.0.0-20221123175721-3e11759d254f +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/database=github.com/fmount/lib-common/modules/database@v0.0.0-20221123175721-3e11759d254f +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/storage=github.com/fmount/lib-common/modules/storage@v0.0.0-20221123175721-3e11759d254f +``` + +This script leverages scripts `hack/cleandeps.py` that cleans existing +dependencies, `hack/showdeps.py` that shows dependencies for a given PR, and +`hack/setdeps.py` that sets the go workspace replaces. + ### Ceph cluster As describe in the [Getting Started Guide](../README.md#getting-started), the diff --git a/hack/checkout_pr.sh b/hack/checkout_pr.sh new file mode 100755 index 00000000..1acddc09 --- /dev/null +++ b/hack/checkout_pr.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -e + +LOCATION=$(realpath `dirname -- $BASH_SOURCE[0]`) + +if [ $# -eq 0 ]; then + echo "Error, missing arguments: $0 [ []]" + exit 1 +fi + +PR=$1 +REMOTE=${2:-upstream} +BRANCH=${3:-pr$1} + +$LOCATION/cleandeps.sh + +# Get the code +echo Fetching PR $PR on $REMOTE/$BRANCH +git fetch $REMOTE pull/$PR/head:$BRANCH +git checkout $BRANCH + +# Get dependencies +echo Checking PR dependecies +deps=`$LOCATION/showdeps.py $PR` +if [[ -n "$deps" ]]; then + echo Setting dependencies: $deps + $LOCATION/setdeps.py $deps +else + echo PR has no dependencies +fi diff --git a/hack/cleandeps.sh b/hack/cleandeps.sh new file mode 100755 index 00000000..0ef73806 --- /dev/null +++ b/hack/cleandeps.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +LOCATION=$(realpath `dirname -- $BASH_SOURCE[0]`) +echo Cleaning go.work +echo -e "go 1.18\n\nuse (\n\t.\n\t./api\n)" > $LOCATION/../go.work diff --git a/hack/setdeps.py b/hack/setdeps.py new file mode 100755 index 00000000..95f71aac --- /dev/null +++ b/hack/setdeps.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# Script to replace go module dependencies for Cinder: +# Supports 4 different formats: +# - By PR numbers: +# ./setdeps.py lib-common=88 openstack-operator=38 +# - By local directory where the local dir is already with the right code: +# ./setdeps.py lib-common=../lib-common \ +# openstack-operator=../openstack-operator +# - By upstream repository +# ./setdeps.py \ +# lib-common=https://github.com/fmount/lib-common/@extra_volumes \ +# openstack-operator=https://github.com/fmount/openstack-operator/@extra_volumes +# - By short upstream repository: +# ./setdeps.py lib-common=fmount/lib-common@extra_volumes \ +# openstack-operator=fmount/openstack-operator@extra_volumes +import json +import os +import re +import subprocess +import sys +import urllib.request + +GH_API_URL = 'https://api.github.com/repos/openstack-k8s-operators' + + +def get_modules(repo, requires): + """Return a list of tuples of (repository, module) for a specific repo.""" + result = [] + for require in requires: + try: + index = require.index('/' + repo) + + module_index = index + 1 + len(repo) + url = require[:module_index] + module = require[module_index:] + if module[0] == '/': + result.append((url, module)) + except ValueError: + continue + return result + + +def get_requires(): + """Get all requires modules from go.mod""" + # Get location of the go.mod file + result = subprocess.run('go env GOMOD', shell=True, check=True, + capture_output=True) + go_mod_path = result.stdout.strip() + # Use findall to get direct and indirect + require_sections = re.findall(r"^require \(\n(.*?)\n\)", + open(go_mod_path).read(), + re.MULTILINE | re.DOTALL) + if not require_sections: + raise Exception('Error parsing go.mod') + res = [] + for requires_section in require_sections: + lines = requires_section.split('\n') + res.extend(line.strip().split()[0] for line in lines if line) + return res + + +def main(args): + source_is = None + + requires = get_requires() + for arg in args: + repo = arg[0] + go_module_url = f'github.com/openstack-k8s-operators/{repo}' + + source_version = '' + + try: + pr = int(arg[1]) + source_is = 'PR' + except ValueError: + src_path = arg[1] + if os.path.exists(src_path): + source_is = 'LOCAL' + else: + source_is = 'REPO' + # Build url if we where just provided a partial repo + # eg: fpantano/lib-common@extravol + if not src_path.startswith('http'): + src_path = 'github.com/' + src_path + + if source_is == 'PR': + api_url = f'{GH_API_URL}/{repo}/pulls/{pr}' + + contents_str = urllib.request.urlopen(api_url).read() + contents = json.loads(contents_str) + source_version = '@' + contents['head']['ref'] + + src_path = 'github.com/' + contents['head']['repo']['full_name'] + print(f'Source for {repo} PR#{pr} is {src_path}{source_version}') + + elif source_is == 'LOCAL': + src_path = os.path.abspath(src_path) + print(f'Source repo for {repo} is {src_path}') + elif source_is == 'REPO': # is a repo + if '@' in src_path: + src_path, source_version = src_path.split('@') + source_version = '@' + source_version + print(f'Source repo for {repo} is {src_path}') + + if source_version: + print(f'Checking the go mod version for branch {source_version}') + sha = contents['head']['sha'][:12] + result = subprocess.run(f'go list -m -json {src_path}@{sha}', + shell=True, check=True, + capture_output=True) + source_version = '@' + json.loads(result.stdout)['Version'] + + modules = get_modules(repo, requires) + for go_module_url, module in modules: + cmd = (f'go work edit -replace {go_module_url}{module}=' + f'{src_path}{module}{source_version}') + print(cmd) + os.system(cmd) + print() + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print('Error, missing arguments') + exit(1) + + args = [] + for arg in sys.argv[1:]: + args.append(arg.split('=')) + + main(args) diff --git a/hack/showdeps.py b/hack/showdeps.py new file mode 100755 index 00000000..811e4780 --- /dev/null +++ b/hack/showdeps.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# Script to get dependencies recursively for a PR. +# PR message and commit messages must use Depends-On in one of these 4 formats: +# - Depends-On: lib-common=88 +# - Depends-On: openstack-k8s-operators/lib-common#88 +# - Depends-On: lib-common#88 +# - Depends-On: https://github.com/openstack-k8s-operators/lib-common/88 +# Output is consumable directly by the setdeps.py script +# $ ./showdeps PR# [repo] +# eg: +# $ ./showdeps 65 +# lib-common=88 +# +# $ ./showdeps 65 cinder-operator +# lib-common=88 + +import json +import re +import sys +import urllib.request + +GH_API_URL = 'https://api.github.com/repos/openstack-k8s-operators' +REPO = 'cinder-operator' +RECURSIVE = False + + +def get_gh_json(repo, pr, ending=''): + api_url = f'{GH_API_URL}/{repo}/pulls/{pr}{ending}' + contents_str = urllib.request.urlopen(api_url).read() + return json.loads(contents_str) + + +def find_dependencies(text): + result = [] + depends = re.findall(r"\n\s*Depends-On:\s*(\S+)\s*?", text, re.IGNORECASE) + for dep in depends: + # lib-common=88 + if '=' in dep: + res = dep.split('=') + # openstack-k8s-operators/lib-common#88 + # lib-common#88 + elif '#' in dep: + res = dep.rsplit('#', 1) + res[0] = res[0].rsplit('/', 1)[-1] + # https://github.com/openstack-k8s-operators/lib-common/88 + else: + r = dep.rsplit('/', 3) + if len(r) < 4: + sys.stderr.write(f'Wrong Depends-On on: {dep}\n') + continue + res = r[1], r[3] + result.append(res) + return result + + +def get_dependencies(repo, pr): + contents = get_gh_json(repo, pr) + pr_message = contents['body'] + # Initialize to the dependencies found in the PR message + result = find_dependencies(pr_message) + + # Find additional dependencies in commit messages + contents = get_gh_json(repo, pr, '/commits') + for commit in contents: + message = commit['commit']['message'] + deps = find_dependencies(message) + if deps: + result.extend(deps) + return result + + +def main(args): + pr = args[1] + repo = args[2] if len(args) > 2 else REPO + dependencies = [] + to_check = [(repo, pr)] + checked = [] + while to_check: + check = to_check.pop() + # Detect circular references + if check in checked: + continue + new_deps = get_dependencies(*check) + dependencies.extend(new_deps) + if not RECURSIVE: + break + to_check.extend(new_deps) + checked.append(check) + + if dependencies: + # Convert to str and remove duplicated dependencies + deps_str = set(f'{dep[0]}={dep[1]}' for dep in dependencies) + print(' '.join(deps_str)) + + +if __name__ == '__main__': + main(sys.argv) From 4dfb1e75b2d7b27fc882dc457ea10ebc0d836bdc Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Mon, 19 Sep 2022 18:00:57 +0200 Subject: [PATCH 3/7] Docs: Update README.md This patch updates the README.md file to be a bit more detailed in how we can get a working deployment with Ceph as a backend for Cinder and Glance. --- README.md | 293 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 191 insertions(+), 102 deletions(-) diff --git a/README.md b/README.md index 385e1178..4f674e97 100644 --- a/README.md +++ b/README.md @@ -1,149 +1,239 @@ -# cinder-operator -// TODO(user): Add simple overview of use/purpose - -## Description -// TODO(user): An in-depth paragraph about your project and overview of use +# CINDER-OPERATOR -## Getting Started -You’ll need a Kubernetes cluster to run against. Our recommendation for the time being is to use -[OpenShift Local](https://access.redhat.com/documentation/en-us/red_hat_openshift_local/2.2/html/getting_started_guide/installation_gsg) (formerly known as CRC / Code Ready Containers). -We have [companion development tools](https://github.com/openstack-k8s-operators/install_yamls/blob/master/devsetup/README.md) available that will install OpenShift Local for you. +The cinder-operator is an OpenShift Operator built using the Operator Framework +for Go. The Operator provides a way to easily install and manage an OpenStack +Cinder installation on OpenShift. This Operator was developed using RDO +containers for openStack. -### Running on the cluster -1. Install Instances of Custom Resources: +## Getting started + +**NOTES:** + +- *The project is in a rapid development phase and not yet intended for + production consumption, so instructions are meant for developers.* + +- *If possible don't run things in your own machine to avoid the risk of + affecting the development of your other projects.* + +Here we'll explain how to get a functiona OpenShift deployment running inside a +VM that is running MariaDB, RabbitMQ, KeyStone, Glance, and Cinder services +against a Ceph backend. + +There are 4 steps: + +- [Install prerequisites](#prerequisites) +- [Deploy an OpenShift cluster](#openshift-cluster) +- [Prepare Storage](#storage) +- [Deploy OpenStack](#deploy) + +### Prerequisites + +There are some tools that will be required through this process, so the first +thing we do is install them: ```sh -kubectl apply -f config/samples/ +sudo dnf install -y git wget make ansible-core python-pip podman gcc ``` -2. Build and push your image to the location specified by `IMG`: - +We'll also need this repository as well as `install_yamls`: + ```sh -make docker-build docker-push IMG=/cinder-operator:tag +cd ~ +git clone https://github.com/openstack-k8s-operators/install_yamls.git +git clone https://github.com/openstack-k8s-operators/cinder-operator.git ``` - -3. Deploy the controller to the cluster with the image specified by `IMG`: + +### OpenShift cluster + +There are many ways get an OpenShift cluster, and our recommendation for the +time being is to use [OpenShift Local](https://access.redhat.com/documentation/en-us/red_hat_openshift_local/2.5/html/getting_started_guide/index) +(formerly known as CRC / Code Ready Containers). + +To help with the deployment we have [companion development tools](https://github.com/openstack-k8s-operators/install_yamls/blob/master/devsetup) +available that will install OpenShift Local for you and will also help with +later steps. + +Running OpenShift requires a considerable amount of resources, even more when +running all the operators and services required for an OpenStack deployment, +so make sure that you have enough resources in the machine to run everything. + +You will need at least 5 CPUS and 16GB of RAM, preferably more, just for the +local OpenShift VM. + +**You will also need to get your [pull-secrets from Red Hat]( +https://cloud.redhat.com/openshift/create/local) and store it in the machine, +for example on your home directory as `pull-secret`.** ```sh -make deploy IMG=/cinder-operator:tag +cd ~/install_yamls/devsetup +PULL_SECRET=~/pull-secret CPUS=6 MEMORY=20480 make download_tools crc ``` -### Uninstall CRDs -To delete the CRDs from the cluster: +This will take a while, but once it has completed you'll have an OpenShift +cluster ready. + +Now you need to set the right environmental variables for the OCP cluster, and +you may want to logging to the cluster manually (although the previous step +already logs in at the end): ```sh -make uninstall +eval $(crc oc-env) ``` -### Undeploy controller -UnDeploy the controller to the cluster: +**NOTE**: When CRC finishes the deployment the `oc` client is logged in, but +the token will eventually expire, in that case we can login again with +`oc login -u kubeadmin -p 12345678 https://api.crc.testing:6443`, or use the +[helper functions](CONTRIBUTING.md#helpful-scripts). + +Let's now get the cluster version confirming we have access to it: ```sh -make undeploy +oc get clusterversion ``` -### Configure Cinder with Ceph backend +If you are running OCP on a different machine you'll need additional steps to +[access its dashboard from an external system](https://github.com/openstack-k8s-operators/install_yamls/tree/master/devsetup#access-ocp-from-external-systems). + +### Storage -The Cinder spec API can be used to configure and customize the Ceph backend. In -particular, the `customServiceConfig` parameter should be used, for each -defined volume, to override the `enabled_backends` parameter, which must exist -in `cinder.conf` to make the `cinderVolume` pod run. The global `cephBackend` -parameter is used to specify the Ceph client-related "key/value" pairs required -to connect the service with an external Ceph cluster. Multiple external Ceph -clusters are not supported at the moment. The following represents an example -of the Cinder object that can be used to trigger the Cinder service deployment, -and enable the Cinder backend that points to an external Ceph cluster. +There are 2 kinds of storage we'll need: One for the pods to run, for example +for the MariaDB database files, and another for the OpenStack services to use +for the VMs. +To create the pod storage we run: + +```sh +cd ~/install_yamls +make crc_storage ``` -apiVersion: cinder.openstack.org/v1beta1 -kind: Cinder -metadata: - name: cinder - namespace: openstack -spec: - serviceUser: cinder - databaseInstance: openstack - databaseUser: cinder - cinderAPI: - replicas: 1 - containerImage: quay.io/tripleowallabycentos9/openstack-cinder-api:current-tripleo - cinderScheduler: - replicas: 1 - containerImage: quay.io/tripleowallabycentos9/openstack-cinder-scheduler:current-tripleo - cinderBackup: - replicas: 1 - containerImage: quay.io/tripleowallabycentos9/openstack-cinder-backup:current-tripleo - secret: cinder-secret - cinderVolumes: - volume1: - containerImage: quay.io/tripleowallabycentos9/openstack-cinder-volume:current-tripleo - replicas: 1 - customServiceConfig: | - [DEFAULT] - enabled_backends=ceph - cephBackend: - cephFsid: - cephMons: - cephClientKey: - cephUser: openstack - cephPools: - cinder: - name: volumes - nova: - name: vms - glance: - name: images - cinder_backup: - name: backup - extra_pool1: - name: ceph_ssd_tier - extra_pool2: - name: ceph_nvme_tier - extra_pool3: - name: ceph_hdd_tier + +As for the storage for the OpenStack services, at the time of this writing only +NFS and Ceph are supported. + +For simplicity's sake we'll use a *toy* Ceph cluster that runs in a single +local container using a simple script provided by this project. Beware that +this script overrides things under `/etc/ceph`: + +**NOTE**: This step must be run after the OpenShift VM is running because it +binds to an IP address created by it. + +```sh +~/cinder-operator/hack/dev/create-ceph.sh ``` -When the service is up and running, it's possible to interact with the cinder -API and create the Ceph `cinder type` backend which is associated with the Ceph -tier specified in the config file. +Using an external Ceph cluster is also possible, but out of the scope of this +document, and the manifest we'll use have been tailor made for this specific +*toy* Ceph cluster. +### Deploy -## Contributing -// TODO(user): Add detailed information on how you would like others to contribute to this project +Deploying the podified OpenStack control plane is a 2 step process. First +deploying the operators, and then telling the openstack-operator how we want +our OpenStack deployment to look like. -### How it works -This project aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) +Deploying the openstack operator: -It uses [Controllers](https://kubernetes.io/docs/concepts/architecture/controller/) -which provides a reconcile function responsible for synchronizing resources untile the desired state is reached on the cluster +```sh +cd ~/install_yamls +make openstack +``` -### Test It Out -1. Install the CRDs into the cluster: +Once all the operator ready we'll see the pod with: ```sh -make install +oc get pod -l control-plane=controller-manager ``` -2. Run your controller (this will run in the foreground, so switch to a new terminal if you want to leave it running): +And now we can tell this operator to deploy RabbitMQ, MariaDB, Keystone, Glance +and Cinder using the Ceph *toy* cluster: ```sh -make run +export OPENSTACK_CR=`realpath ~/cinder-operator/hack/dev/openstack-ceph.yaml` +cd ~/install_yamls +make openstack_deploy ``` -**NOTE:** You can also run this in one step by running: `make install run` +After a bit we can see the 5 operators are running: + +```sh +oc get pods -l control-plane=controller-manager +``` -### Modifying the API definitions -If you are editing the API definitions, generate the manifests such as CRs or CRDs using: +And a while later the services will also appear: ```sh -make manifests +oc get pods -l app=mariadb +oc get pods -l app.kubernetes.io/component=rabbitmq +oc get pods -l service=keystone +oc get pods -l service=glance +oc get pods -l service=cinder ``` -**NOTE:** Run `make --help` for more information on all potential `make` targets +### Configure Clients -More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html) +Now that we have the OpenStack services running we'll want to setup the +different OpenStack clients. -## License +For convenience this project has a simple script that does it for us: + +```sh +source ~/cinder-operator/hack/dev/osp-clients-cfg.sh +``` + +We can now see available endpoints and services to confirm that the clients and +the Keystone service work as expected: + +```sh +openstack service list +openstack endpoint list +``` + +Upload a glance image: + +```sh +cd +wget http://download.cirros-cloud.net/0.5.2/cirros-0.5.2-x86_64-disk.img -O cirros.img +openstack image create cirros --container-format=bare --disk-format=qcow2 < cirros.img +openstack image list +``` + +And create a cinder volume: + +```sh +openstack volume create --size 1 myvolume +``` + +## Cleanup + +To delete the deployed OpenStack we can do: + +```sh +cd ~/install_yamls +make openstack_deploy_cleanup +``` + +Once we've done this we need to recreate the PVs that we created at the start, +since some of them will be in failed state: + +```sh +make crc_storage_cleanup crc_storage +``` + +We can now remove the openstack-operator as well: + +```sh +make openstack_cleanup +``` + +# ADDITIONAL INFORMATION + +**NOTE:** Run `make --help` for more information on all potential `make` +targets. + +More information about the Makefile can be found via the [Kubebuilder +Documentation]( https://book.kubebuilder.io/introduction.html). + +# LICENSE Copyright 2022. @@ -158,4 +248,3 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - From 1d7d99fb6496feb772be358b122b6a854ff8af2c Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Thu, 17 Nov 2022 18:03:52 +0100 Subject: [PATCH 4/7] Docs: Contributor guide This patch adds some documentations to help new contributors get started. It is not complete, since it doesn't explain how to work with merged and unmerged dependencies with other operators and lib-common, but it should serve as a good start. --- CONTRIBUTING.md | 194 +++++++++++++++++++++++++++++++++++ README.md | 3 + docs/dev/assumptions.md | 17 +++ docs/dev/custom-image.md | 148 ++++++++++++++++++++++++++ docs/dev/design-decisions.md | 23 +++++ docs/dev/local-registry.md | 94 +++++++++++++++++ docs/dev/local.md | 135 ++++++++++++++++++++++++ 7 files changed, 614 insertions(+) create mode 100644 CONTRIBUTING.md create mode 100644 docs/dev/assumptions.md create mode 100644 docs/dev/custom-image.md create mode 100644 docs/dev/design-decisions.md create mode 100644 docs/dev/local-registry.md create mode 100644 docs/dev/local.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..a455885e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,194 @@ +# Contributing to the cinder operator + +Thank you for taking the time to contribute! + +The following is a set of guidelines for contributing to the [cinder-operator +hosted in GitHub](https://github.com/openstack-k8s-operators/cinder-operator). +Feel free to propose changes to this document or any other in a pull request. + +## What should I know before I get started? + +The cinder-operator is not a large open source project, but it's part of the +[OpenStack podification effort](https://github.com/openstack-k8s-operators) +which is of some size. + +There are some requirements to deploy a working Cinder control plane within +OpenShift using the cinder-operator. For simplicity's sake all the +documentation will assume that the +[openstack-operator](https://github.com/openstack-k8s-operators/openstack-operator) +will be used to deploy necessary services, though this doesn't mean that there +are no other ways to do it. + +Before working on your first code contribution it would be a good idea to +complete the [getting started](README.md#getting-started) section first to get +familiar with the deploying of the services and to get a feeling of the +behavior of a working OpenStack deployment. + +Please refer to the [OpenStack documentation](https://docs.openstack.org) to +learn about OpenStack itself. + +### Design decisions + +There is some [global podification +documentation](https://github.com/openstack-k8s-operators/docs) relevant for +all the operators, but there are also some global design decisions that have +not been spelled out yet anywhere else, so they are included in the +[cinder-operator design decisions document](docs/dev/design-decisions.md). + +## How to contribute? + +This project is [using pull +request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) +to submit changes, but it is not currently using the [issues +section](https://github.com/openstack-k8s-operators/cinder-operator/issues) or +any other GitHub feature to track the project's bugs or features and instead it +is expected that whoever finds an issue will fix it or will find someone else +to work on it. + +### Testing changes + +Developers submitting PRs are expected to have run the code and manually +verified the functionality before submitting the PR, with the exception +of documentation only and CI only PRs. + +There are multiple ways to test code changes, but in general terms they can be +split into 2 categories: running things locally or in a container in the +OpenShift cluster. + +Running things locally is considerably faster but it doesn't use the real ACLs +(RBACs) the cinder-operator would use on a normal deployment. On the other hand +running things in OpenShift is slower (because we have to build a new +container) but runs as close as a real deployment as we can. + +Each of the approaches has its own advantages and disadvantages, so different +variants of both approaches will be covered for readers to choose the one they +prefer. + +The following list of articles assumes that podified OpenStack has been +deployed using the openstack-operator as described in the [Getting started +section](README.md#getting-started): + +- [Run operator locally](docs/dev/local.md) +- [Run operator in OpenShift using a custom image](docs/dev/custom-image.md) + +### Pull Requests + +While the pull request flow is used for submitting new issues and for +reviewing, the git repository should **always** be the source of truth, so all +decisions made through the reviewing and design phases should be properly +reflected in the final code, code comments, documentation, and git commit +message that is merged. It should not be necessary to go to a PR in GitHub and +go through the comments to know what a commit is meant to do or why it is doing +it. + +*One-liner* commit messages should be avoided like the plague. Please refer to +the [commit messages guide line](#commit-messages) for good practices on commit +messages. + +The cinder-operator project will not squash all pull request commits into a +single commit but will look to preserve all submitted individual commits +instead using a merge strategy instead. This means that we can have both +single commit and as multi-commit PRs, and both have their places. It's all +about how and when to split changes. + +#### Structural split of changes + +The general rule for how to split code changes into commits is that we should +aim to have a single "logical change" per commit. + +As the [OpenStack Git Commit Good +Practice](https://wiki.openstack.org/wiki/GitCommitMessages) explains, there +are multiple reasons why this rule is important: + +- The smaller the amount of code being changed in a commit, the quicker & + easier it is to review & identify potential flaws in that specific code + change. +- If a change is found to be flawed later, it may be necessary to revert the + broken commit. This is much easier to do if there are not other unrelated + code changes entangled with the original commit. +- When troubleshooting problems using Git's bisect capability, small well + defined changes will aid in isolating exactly where the code problem was + introduced. +- When browsing history using Git annotate/blame, small well defined changes + also aid in isolating exactly where & why a piece of code came from. + +In the [OpenStack Git Commit Good Practice +page](https://wiki.openstack.org/wiki/GitCommitMessages) we can also find two +great sections explaining [things to avoid when creating +commits](https://wiki.openstack.org/wiki/GitCommitMessages#Things_to_avoid_when_creating_commits) +and [examples of bad +practice](https://wiki.openstack.org/wiki/GitCommitMessages#Examples_of_bad_practice) +that contributors to this repository should take into consideration. + +#### PR Images + +Once a PR merges it will trigger an image rebuild and publish, so how can we +tell when the new image is ready? + +For that we'll go to the [project's +actions](https://github.com/openstack-k8s-operators/cinder-operator/actions) +and in there [we select the `Cinder Operator image builder` +job](https://github.com/openstack-k8s-operators/cinder-operator/actions/workflows/build-cinder-operator.yaml) +where we'll see the job that is building the new operator container image. + +In there how the job is building 3 images: Operator, Bundle, and Index, and if +you go into each one of them and expand the `Push **** To quay.io` step you can +find the actual image location. + +For example: + +``` +Successfully pushed "cinder-operator-index:9f3d1ec26ba8939710f146f3e6a1d81f5077be8a" to "quay.io/***/cinder-operator-index:9f3d1ec26ba8939710f146f3e6a1d81f5077be8a" +``` + +## Style Guides + +### Go Style Guide + +While the project has not formally defined a Go Style Guide for the project it +uses [Gofmt](https://pkg.go.dev/cmd/gofmt) for automated code formatting. + +This tool is automatically called when building the cinder-operator binary +using the `Makefile` or when it is manually invoked with: + +```sh +make fmt +``` + +Pull Requests are expected to have passed the formatting tool before committing +the code and submitting the PR. + +### Commit Messages + +As mentioned before, commit messages are a very important part of a pull +request, and their contents must be carefully crafted. + +Commit messages should: + +- Provide a brief description of the change in the first line. +- Insert a single blank line after the first line. +- Provide a detailed description of the change in the following lines, breaking + paragraphs where needed. +- Lines should be wrapped at 72 characters. + +Once again the OpenStack documentation goes over the [important things to +consider when writing the commit +message](https://wiki.openstack.org/wiki/GitCommitMessages#Information_in_commit_messages), +so please take a good look. The short version is: + +- Do not assume the reviewer understands what the original problem was. +- Do not assume the reviewer has access to external web services/site. +- Do not assume the code is self-evident/self-documenting. +- Describe why a change is being made. +- Read the commit message to see if it hints at improved code structure. +- The first commit line is the most important. +- Describe any limitations of the current code. +- Do not assume the reviewer has knowledge of the tests executed on the change +- Do not include patch set-specific comments. + +## Helpful scripts + +Under the `hack` directory we'll find scripts that can help us in the +development of the cinder-operator as well as some tips and tricks. + +Refer to its `README.md` file for additional information. diff --git a/README.md b/README.md index 4f674e97..a3ec4783 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,9 @@ targets. More information about the Makefile can be found via the [Kubebuilder Documentation]( https://book.kubebuilder.io/introduction.html). +For developer specific documentation please refer to the [Contributing +Guide](CONTRIBUTING.md). + # LICENSE Copyright 2022. diff --git a/docs/dev/assumptions.md b/docs/dev/assumptions.md new file mode 100644 index 00000000..49a838a2 --- /dev/null +++ b/docs/dev/assumptions.md @@ -0,0 +1,17 @@ +# Assumptions + +The different articles describing how to run custom cinder-operator code make +some assumptions, please adjust the steps in those cases where your system +doesn't match them: + +- You have an OpenShift cluster running and you have logged in it with `oc`, so + there are valid credentials in the system. + +- OpenStack operator repositories that are referenced are located directly in a + directory within the home directory following the GitHub repository name. In + the cinder-operator case this will be `~/cinder-operator`. + +- The `install_yamls` repository is in `~/install_yamls`. + +- Local repositories will have 2 remotes defined `origin` for our fork and + `upstream` for the `openstack-k8s-operators` repository. diff --git a/docs/dev/custom-image.md b/docs/dev/custom-image.md new file mode 100644 index 00000000..2e70bb20 --- /dev/null +++ b/docs/dev/custom-image.md @@ -0,0 +1,148 @@ +# Running the operator in OpenShift + +**NOTE**: This article [makes some assumptions](assumptions.md), make sure they +are correct or adapt the steps accordingly. + +This development model is the closest to the *real thing* because the +cinder-operator will be running in OpenShift in a pod and using the RBACs we +have defined, but since we have to build the container and upload it to a +registry, and then OpenShift needs to download it, it will be considerably +slower than just running [the operator locally](local.md). + +Before we go and build the container image we should decide what container +image registry we want to use, because we can use a public registry,such as +quay.io, a private registry, or even run a *toy* registry locally. + +Running a *toy* registry may require running a couple more commands the first +time, but it will prove to be much faster, since image pushing and pulling will +not go through your internet connection. + +To make things easier we [have a simplified explanation of how to run a local +toy registry](local-registry.md) in the IP address (`192.168.130.1`) that CRC +assigns the host when deployint the VM. + +The next of the document assumes the *toy* registry is being used. + +### Preparation + +This article assumes we have followed the [Getting +Started](../../README.md#getting-started) section successfully so we'll not +only have a cinder-operator pod running, but also the different cinder +services. + +Unlike when we are [running the operator locally](local.md) here we don't need +to manually stop the existing operator, we can leave it running. And we won't +stop the Cinder services either to illustrate another way of doing things, +though we could do it here if we want, just like we did when [running the +operator locally](local.md#preparation). + +### Image build + +Before continuing make sure you are in the `cinder-operator` directory where +the changes to test are. + +Now we will build the container image for the cinder-operator: + +```sh +export IMG=192.168.130.1:5000/cinder-operator:latest + +make docker-build +``` + +This command takes a while to execute, but once it completes we'll have the new +cinder-operator container image in our local registry (don't mistake this for +the *toy* registry) and we can confirm the presence of the image with `podman +images`. + +Now it's time to push it to our *toy* registry: + +```sh +export VERIFY_TLS=false + +make docker-push +``` + +At this point OpenShift will be able to pull the new image container from this +*toy* registry instead of having to access a registry from Internet. + +### Run + +Before we try to run the new operator we may have to generate and install the +new CRDs and RBACs if they have changed. This can be easily done by running +`make install`. + +As mentioned before we haven't stopped the cinder-operator that is running in +OpenShift, so what we are going to do is replace the running operator to make +it use the one we just built. + +We start by searching for the name of the `ClusterServiceVersion` of the +OpenStack operator and editing its current CR: + +```sh +CSV=`oc get -l operators.coreos.com/openstack-operator.openstack= -o custom-columns=CSV:.metadata.name --no-headers csv` + +oc edit csv $CSV +``` + +Now we search for the first instance of `name: +cinder-operator-controller-manager`, and within its `spec.containers` sections +look for the `image:` definition where the cinder-container image location is +defined. + +Now we change the location to `192.168.130.1:5000/cinder-operator:latest`, save +and exit the editor. + +At this point OpenShift should detect the change and try to reconcile the CSV, +this will terminate the existing cinder-operator pod that is running and start +a new one with the new image. We can confirm it using `oc describe pod` with +the name of the new cinder-operator pod. + +**NOTE**: If the new image is not used we may need to force a faster respose by +changing the cinder-operator replicas to 0 in the CSV, save and exit, wait +until the pod is terminated and then change it back to 1. + +Since we didn't remove the Cinder services that were running, once the new +cinder-operator is running it should reconcile the existing `Cinder` CR, +modifying existing `StatefulSet` and `Deployment` manifests used for the Cinder +services according to the new code. + +We may also want to uninstall all cinder services and see how our newly +deployed operator deploys them from scratch. To do that we'll need to edit the +`OpenStackControlPlane` CR that was used to deploy OpenStack and currently +exists in the OpenShift cluster. + +```sh +oc edit OpenStackControlPlane openstack +``` + +Now we search for the `cinder` section and in its `template` section we change +the `replicas` value to `0` for the 4 services, then save and exit the editor. + +This will make the openstack-operator notice the change and modify the `Cinder` +CR, which in turn will be detected by the cinder-operator triggering the +termination of the cinder services in order during the reconciliation. + +Once the cinder service pods are gone we can set the `replicas` back to `1` in +the `cinder` section of the `OpenStackControlPlane` CR to trigger the +deployment of the Cinder services. The easiest way to do so is to apply the +original manifest we used to deploy OpenStack in the first place: + +```sh +oc apply -f hack/dev/openstack.yaml +``` + +**NOTE**: The Cinder DB is not deleted when uninstalling Cinder services, so +Cinder DB migrations will run faster this time (they won't do anything). + +To see what the cinder-operator is doing we'll need to do `oc logs -f` for the +cinder-operator. + +### Final notes + +If we need to make changes to the operator we'll need to go through the `make +install` and `make docker-build docker-push` cycle again to create a new image. + +Making OpenShift use the new image we just built should be easy enough, we just +need to delete the cinder-operator pod, since the `imagePullPolicy` policy of +the pod is `Always`, so it checks that the source image is up to date every +time it runs the pod. diff --git a/docs/dev/design-decisions.md b/docs/dev/design-decisions.md new file mode 100644 index 00000000..626eea3d --- /dev/null +++ b/docs/dev/design-decisions.md @@ -0,0 +1,23 @@ +# Design decisions + +We have agreed on a very basic set of principles we would like to follow during +the development of the OpenStack Operators: + +- OpenShift is the only intended Container Orchestration system we aim to + support, and code can depend on OpenShift only available features. + +- Should use configuration snippets for the system administrators (or the + meta-operator) to provide the service specific configuration. This will + reduce the domain specific knowledge required, making it easy for anyone that + knows how to configure the specific service to use the operator. + +- Should aim to follow the Kubernetes [Operator pattern]( + https://kubernetes.io/docs/concepts/extend-kubernetes/operator/). + +- Must use [Controllers]( + https://kubernetes.io/docs/concepts/architecture/controller/) which provide + a reconcile function responsible for synchronizing resources until the + desired state is reached on the cluster. + +- Intended to be deployed via OLM [Operator Lifecycle Manager]( + https://github.com/operator-framework/operator-lifecycle-manager). diff --git a/docs/dev/local-registry.md b/docs/dev/local-registry.md new file mode 100644 index 00000000..0a2f37a4 --- /dev/null +++ b/docs/dev/local-registry.md @@ -0,0 +1,94 @@ +# Using a local registry + +During the development process there will be times when we may want to build +and test container images that we are manually building, for example with `make +docker-build`. + +To use these container images we'll need to have them available in a registry +that is accessible from our OpenShift cluster. + +One possibility is using our personal account in a public registry such as +quay.io, but this forces us to have access to Internet and will be slower due +to the pushing and pulling of images to the registry. + +Another possibility is running a local *toy* registry, which may be a bit more +cumbersome the first time, but will make development with container images +faster. + +We can easily deploy a registry in the host computer from where we'll be +running the CRC VM with: + +```sh +podman run -d -p 5000:5000 --name registry registry:2 +``` + +With this we'll be able to push container images to that local registry using +any of the host IP addresses and port `5000`. + +Since this is a *toy* registry it doesn't use HTTPS but HTTP instead, so there +are some consideration to be taken into account. + +## Allowing the registry + +This insecure registry must be allowed in out OpenShift deployment. + +We do this by changing the `/etc/containers/registries.conf` file in our +OpenShift nodes. + +When using CRC we can easily do this if we leverage our helper functions. + +First we import our helper functions. + +```sh +source ~/cinder-operator/hack/dev/helpers.sh +``` + +Now we allow the insecure registry using the IP address that CRC creates on the +host during deployment (`192.168.130.1`) and then restart the `crio` engine +and the `kubelet` service: + +```sh +crc_ssh "echo -e '\n[[registry]]\nlocation = \"192.168.130.1:5000\"\ninsecure = true' | sudo tee -a /etc/containers/registries.conf" + +crc_ssh sudo systemctl restart crio kubelet +``` + +Now OpenShift will be able to pull images from our local HTTP registry without +complain. + +## Pushing images + +Pushing containers to the registry will fail because the client won't be able +to do the TLS verification, so we need to disable it. + +To disable TLS verification we must pass the `--tls-verify=false` argument on +the `push` call. + +For example: + +```sh +podman push --tls-verify=false 192.168.130.1:5000/cinder-operator:latest +``` + +In Cinder and some other `Makefile`s this can be achieved using the environmental +variable `VERIFY_TLS`: + +```sh +export VERIFY_TLS=false +make docker-push +``` + +## OPM + +When using the `opm` CLI we must let it know that we'll be using HTTP, which +can be done with the `--use-http` argument. + +There are also some project's `Makefile` files, like Cinder's, that allow us to +use the environmental variable `USE_HTTP` to automatically pass the right +parameter to `opm`. + +```sh +export USE_HTTP=true + +make catalog-build +``` diff --git a/docs/dev/local.md b/docs/dev/local.md new file mode 100644 index 00000000..cabce85f --- /dev/null +++ b/docs/dev/local.md @@ -0,0 +1,135 @@ +# Running the operator locally + +**NOTE**: This article [makes some assumptions](assumptions.md), make sure they +are correct or adapt the steps accordingly. + +This development model is useful for quick iterations of the operator code +where one can easily use debugging tools and change template and asset files on +the fly without needed for deployments. + +We will build and run the operator on the host machine that is running the +OpenShift VM and it will connect to the OpenShift cluster from the outside +using our credentials. + +The downside of this approach is that we are not running things in a container +inside OpenShift, so there could be differences between what is accessible in +one case and the other. For example, we'll have admin credentials running +things on the host, whereas the operator deployed inside OpenShift will have +more restrictive ACLs. + +Another downside is that we'll have to manually login into the cluster every +time our login credentials expire. + +This process can be used for quick development and once we are ready we can +move to [building custom images and running them in OpenShift](custom-image.md) +where things are run as they would normally do. + +### Preparation + +This article assumes we have followed the [Getting +Started](../../README.md#getting-started) section successfully so we'll not +only have a cinder-operator pod running, but also the different cinder +services. + +Since we have everything running we need to uninstalling both the +cinder-operator and Cinder services. + +To uninstall the Cinder services we will edit the `OpenStackControlPlane` CR +that we used to deploy OpenStack and is present in the OpenShift cluster. + +```sh +oc edit OpenStackControlPlane openstack +``` + +Now we search for the `cinder` section and in its `template` section we change +the `replicas` value to `0` for the 4 services, then save and exit the editor. + +This will make the openstack-operator notice the change and modify the `Cinder` +CR, which in turn will be detected by the cinder-operator triggering the +termination of the cinder services in order during the reconciliation. + +**NOTE**: The Cinder DB is not deleted when uninstalling Cinder services, so +Cinder DB migrations will run faster on the next deploy (they won't do +anything) and volume, snapshot, and backup records will not be lost. + +Once we no longer have any of the cinder service pods (`oc get pod -l +service=cinder` returns no pods) we can proceed to remove the cinder-operator +pod that is currently running on OpenShift so it doesn't conflict with the one +we'll be running locally. + +We search for the name of the `ClusterServiceVersion` of the OpenStack operator +and edit its current CR: + +```sh +CSV=`oc get -l operators.coreos.com/openstack-operator.openstack= -o custom-columns=CSV:.metadata.name --no-headers csv` + +oc edit csv $CSV +``` + +This will drop us in our editor with the contents of CSV YAML manifest where +we'll search for the first instance of `name: +cinder-operator-controller-manager`, and we should see something like: + +``` + - label: + control-plane: controller-manager + name: cinder-operator-controller-manager + spec: + replicas: 1 + selector: + matchLabels: + control-plane: controller-manager +``` + +Where we see `replicas: 1` change it to `replicas: 0`, save and exit. This +triggers the termination of the cinder-operator pod. + +### Build and Run + +Before continuing make sure you are in the `cinder-operator` directory where +the changes to test are. + +If our local code is changing the cinder-operator CRDs or adding new ones we +need to regenerate the manifests and change them in OpenShift. This can be +easily done by running `make install`, which first builds the CRDs (using the +`manifests` target) and then installs them in the OpenShift cluster. + +Now it's time to build the cinder operator (we'll need go version 1.18) and run +it (remember you need to be logged in with `oc login` or `crc_login` if you are +using the helper functions): + +```sh +make build +OPERATOR_TEMPLATES=$PWD/templates ./bin/manager +``` + +We can also do it with a single make call: `OPERATOR_TEMPLATES=$PWD/templates +make install run` + +Any changes in the `templates` directory will be automatically available to the +operator and there will be no need to recompile, rebuild, or restart the +cinder-operator. + +Now that the cinder operator is running locally we can go back and set the +`replicas` back to `1` in the `cinder` section of the `OpenStackControlPlane` +CR to trigger the deployment of the Cinder services. The easiest way to do so +is to apply the original manifest we used to deploy OpenStack in the first +place: + +```sh +oc apply -f hack/dev/openstack.yaml +``` + +We should now see the local cinder-operator detecting the change and we'll be +able to see validate our code changes. + +### Final notes + +If there's something wrong we can stop the operator with Ctrl+C and repeat the +process: Run `make install` if there are changes to the CRDs and rebuild and +rerun the cinder-operator. + +Remember that this workflow doesn't take into account the cinder-operator's +RBACs so even if things work here they could fail during a real deployment. It +would be prudent to [run the new code in OpenShift](custom-image.md) to be sure +that there will be no ACL issues. From 78f0212bf13d2d125d521d492abade1ce8cbafa7 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Mon, 21 Nov 2022 15:34:39 +0100 Subject: [PATCH 5/7] Docs: External dependencies This patch introduces a couple of guides on how to do dependencies in the cinder-operator to explain simple and circular references for both running the operator locally and building the operator. --- CONTRIBUTING.md | 39 ++++ docs/dev/custom-image-dependencies.md | 92 +++++++++ docs/dev/local-dependencies.md | 286 ++++++++++++++++++++++++++ 3 files changed, 417 insertions(+) create mode 100644 docs/dev/custom-image-dependencies.md create mode 100644 docs/dev/local-dependencies.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a455885e..e114fc06 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,6 +70,13 @@ section](README.md#getting-started): - [Run operator locally](docs/dev/local.md) - [Run operator in OpenShift using a custom image](docs/dev/custom-image.md) +- [Running locally with external dependencies](docs/dev/local-dependencies.md) +- [Running in OpenShift with external + dependencies](docs/dev/custom-image-dependencies.md) + +There is a script called `hack/checkout_pr.sh` that is helpful when we want to +test an existing PR that has dependencies. Check the [Testing PR section in the +hack documentation](hack/README.md#testing-prs) for additional information. ### Pull Requests @@ -91,6 +98,38 @@ instead using a merge strategy instead. This means that we can have both single commit and as multi-commit PRs, and both have their places. It's all about how and when to split changes. +#### Dependency Management + +When submitting a PR that has dependencies in other repositories these +dependencies should be stated in the PR or the commits using the `Depends-On:` +tag. + +There are 4 ways to state a dependency with another projects PR: +- `Depends-On: lib-common=88` +- `Depends-On: lib-common#88` +- `Depends-On: openstack-k8s-operators/lib-common#88` +- `Depends-On: https://github.com/openstack-k8s-operators/lib-common/88` + +Multiple `Depends-On:` tags are supported. + +For the time being these tags are only useful when using the +`hack/checkout_pr.sh` or `hack/showdeps.py` scripts. + +A good example of using these tags is the `extraVol` series of PRs. There are +PRs in 4 projects: lib-common, cinder-operator, glance-operator, and the +openstack-operator. + +The operators all require the lib-common PR, but then there are 2 circular +requirements. One is between the cinder-operator and the openstack-operator, +and the other is between the glance-operator and the openstack-operator. + +These are the PRs for reference: + +- https://github.com/openstack-k8s-operators/lib-common/pull/88 +- https://github.com/openstack-k8s-operators/cinder-operator/pull/65 +- https://github.com/openstack-k8s-operators/glance-operator/pull/75 +- https://github.com/openstack-k8s-operators/openstack-operator/pull/38 + #### Structural split of changes The general rule for how to split code changes into commits is that we should diff --git a/docs/dev/custom-image-dependencies.md b/docs/dev/custom-image-dependencies.md new file mode 100644 index 00000000..0db0129e --- /dev/null +++ b/docs/dev/custom-image-dependencies.md @@ -0,0 +1,92 @@ +# Running the operator in OpenShift with external dependencies + +**NOTE**: This article [makes some assumptions](assumptions.md), make sure they +are correct or adapt the steps accordingly. + +If you've already [run the operator in OpenShift](custom-image.md) before, +you'll be familiar with the process of running custom cinder-operator code +by building and deploying a custom image. + +Let's see what we have to do to run our operator in OpenShift when we need to +compile and build containers with external dependencies. + +This short guide builds on the knowledge provided in [guide to running the +operator in OpenShift](custom-image.md) to explain external dependencies usage. + +The steps will be the same, the only difference is that some additional steps +are necessary between the [Preparation](custom-image.md#preparation) and the +[Image build](custom-image.md#image-build) steps to indicate the new +references. + +To avoid repeating things again, this text assumes familiarity with the + concepts presented in the [running locally with external +dependencies](local-dependencies.md) article. + +All the cases described in running the operator locally with external +dependencies where the dependency was expressed as github repositories will +work fine when building the containers, so we don't need to do anything +special. + +The difference comes when using local paths to reference the external +dependencies, because those are outside of the root directory of the +`Dockerfile` and therefore won't be present in the building container. + +The solution is to use bind mounts. + +### Unmerged local simple reference + +Assuming the same example as running things locally we would do: + +``` +cd ~/cinder-operator +mkdir -p tmp/lib-common +sudo mount --bind `realpath ../lib-common` `realpath tmp/lib-common` + +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/common=tmp/lib-common/modules/common +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/database=tmp/lib-common/modules/database +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/storage=tmp/lib-common/modules/storage +``` + +Alternatively we can replace the last 3 commands with a single command: + +``` +hack/setdeps.py lib-common=tmp/lib-common +``` + +Then we can just continue with [building the +image](custom-image.md#image-build) like we would normally do with the exception +of having to add `GOWORK=` as well: + +``` +GOWORK= make docker-build +``` + +### Circular references + +In the circular references case when using local paths it's a bit more +complicated than the local run case, because the bind mount created from the +cinder-operator to the openstack-operator is not preserved when we bind mount +the cinder-operator from the openstack-operator, so we need to link it again. + +We'll assume we have the dependency between cinder-operator and +openstack-operator and that our local repositories have the code we want to +use. + +``` +cd ~/cinder-operator +mkdir -p tmp/openstack-operator +sudo mount --bind `realpath ../openstack-operator` `realpath tmp/openstack-operator` + +go work edit -replace github.com/openstack-k8s-operators/openstack-operator/apis=tmp/openstack-operator + + +cd ~/openstack-operator +mkdir -p tmp/cinder-operator +sudo mount --bind `realpath ../cinder-operator` `realpath tmp/cinder-operator` +sudo mount --bind `realpath ./` `realpath tmp/cinder-operator/tmp/openstack-operator` + +go work edit -replace github.com/openstack-k8s-operators/cinder-operator/api=tmp/cinder-operator/api +``` + +Now we can continue with the [Image build steps](custom-image.md#image-build), +again adding `GOWORK=` to the `docker-build`. diff --git a/docs/dev/local-dependencies.md b/docs/dev/local-dependencies.md new file mode 100644 index 00000000..da8d282b --- /dev/null +++ b/docs/dev/local-dependencies.md @@ -0,0 +1,286 @@ +# Running the operator locally with external dependencies + +**NOTE**: This article [makes some assumptions](assumptions.md), make sure they +are correct or adapt the steps accordingly. + +If you've already [run the operator locally](local.md) before, you'll be +familiar with the process of removing the cinder podified services and the +cinder-operator pod from the OpenShift cluster, building the operator and +running it locally. + +It's very likely that during the development of the cinder-operator you'll find +yourself needing to use a newer version of a dependency or even needing to use +an unmerged code of a dependency. + +This short guide builds on the knowledge provided in [guide to locally run the +operator](local.md) to explain those external dependencies usages. + +The steps will be the same, the only difference is that some additional steps +are necessary between the [Preparation](local.md#preparation) and the [Build +and Run](local.md#build-and-run) steps to indicate the new references. + +There are slight differences between the possible cases, lib-common dependency +update, circular reference with the openstack-operator, the external dependency +code being merged or not, having the external dependency's code locally or not, +etc., so to facilitate its understanding we'll provide separate explanations +for most of them. + +### Merged simple reference + +If we want to compile the cinder-operator with newer lib-common code that has +already merged in its repository but don't want to wait for dependabot to + propose an update to the project's dependencies and for it to get merged we +can just run the following commands from the `cinder-operator` repository +before [Building and Running the operator](local.md#build-and-run): + +``` +go get github.com/openstack-k8s-operators/lib-common/modules/common +go get github.com/openstack-k8s-operators/lib-common/modules/database +go get github.com/openstack-k8s-operators/lib-common/modules/storage +``` + +**NOTE**: Using the ``go get`` command ensures that other `go.mod` adjustments +are made as needed to satisfy constraints imposed by other modules. + +After running these commands we can see that the `go.mod` and `go.sum` have +been updated: + +``` +$ git diff go.* + +diff --git a/go.mod b/go.mod +index 599f056..8e7d423 100644 +--- a/go.mod ++++ b/go.mod +@@ -9,9 +9,9 @@ require ( + github.com/openshift/api v3.9.0+incompatible + github.com/openstack-k8s-operators/cinder-operator/api v0.0.0-20221010180347-a9a8efadf3c3 + github.com/openstack-k8s-operators/keystone-operator/api v0.0.0-20220927090553-6b3218c776f7 +- github.com/openstack-k8s-operators/lib-common/modules/common v0.0.0-20221103175706-2c39582ce513 +- github.com/openstack-k8s-operators/lib-common/modules/database v0.0.0-20220923094431-9fca0c85a9dc +- github.com/openstack-k8s-operators/lib-common/modules/storage v0.0.0-20220923094431-9fca0c85a9dc ++ github.com/openstack-k8s-operators/lib-common/modules/common v0.0.0-20221117092428-c1190ea3bf3d ++ github.com/openstack-k8s-operators/lib-common/modules/database v0.0.0-20221117092428-c1190ea3bf3d ++ github.com/openstack-k8s-operators/lib-common/modules/storage v0.0.0-20221117092428-c1190ea3bf3d + github.com/openstack-k8s-operators/mariadb-operator/api v0.0.0-20221014164348-0a612ae8b391 + github.com/openstack-k8s-operators/openstack-operator/apis v0.0.0-20221107090218-8d63dba1ec13 + k8s.io/api v0.25.4 +diff --git a/go.sum b/go.sum +index c19f296..2130ed6 100644 +--- a/go.sum ++++ b/go.sum +@@ -323,12 +323,18 @@ github.com/openstack-k8s-operators/keystone-operator/api v0.0.0-20220927090553-6 + github.com/openstack-k8s-operators/keystone-operator/api v0.0.0-20220927090553-6b3218c776f7/go.mod h1:q/owiyXlI2W4uQR4TeHPeeN75AGDfyZgQdNHeKUYN68= + github.com/openstack-k8s-operators/lib-common/modules/common v0.0.0-20221103175706-2c39582ce513 h1:PSXOLFTskoG9R/YR4Pg5AOJYS3CEnFbZ2yVdrk9xOE4= + github.com/openstack-k8s-operators/lib-common/modules/common v0.0.0-20221103175706-2c39582ce513/go.mod h1:KWqK7l2ej+rIYngoNUrxE2YjKGlRAAgJXXM0uU2R6XY= ++github.com/openstack-k8s-operators/lib-common/modules/common v0.0.0-20221117092428-c1190ea3bf3d h1:/1FTHxBQJo4xM0GmJCX5wPCYmyLWTw1uHQKCydGH3mY= ++github.com/openstack-k8s-operators/lib-common/modules/common v0.0.0-20221117092428-c1190ea3bf3d/go.mod h1:KWqK7l2ej+rIYngoNUrxE2YjKGlRAAgJXXM0uU2R6XY= + github.com/openstack-k8s-operators/lib-common/modules/database v0.0.0-20220923094431-9fca0c85a9dc h1:87lUVT3MLRI4Vg0nHpupwPKXtykGX3hZzPl5k6Kcyng= + github.com/openstack-k8s-operators/lib-common/modules/database v0.0.0-20220923094431-9fca0c85a9dc/go.mod h1:umGUqQO4JtgefAaIwZjP+TxfxsLMEEeK/6VNzk8ooaI= ++github.com/openstack-k8s-operators/lib-common/modules/database v0.0.0-20221117092428-c1190ea3bf3d h1:lO5WmV9RjVAIxbr1HPjvqVy6niatdEPG+FyEbL4FMpc= ++github.com/openstack-k8s-operators/lib-common/modules/database v0.0.0-20221117092428-c1190ea3bf3d/go.mod h1:umGUqQO4JtgefAaIwZjP+TxfxsLMEEeK/6VNzk8ooaI= + github.com/openstack-k8s-operators/lib-common/modules/openstack v0.0.0-20220915080953-f73a201a1da6 h1:MVNEHyqD0ZdO9jiyUSKw5M2T9Lc4l4Wx1pdC2/BSJ5Y= + github.com/openstack-k8s-operators/lib-common/modules/openstack v0.0.0-20220915080953-f73a201a1da6/go.mod h1:YsqouRH8DoZAjFaxcIErspk59BcwXtVjPxK/yV17Wrc= + github.com/openstack-k8s-operators/lib-common/modules/storage v0.0.0-20220923094431-9fca0c85a9dc h1:Dud2dr25VhaZF9Av28nqmCeBfNkGWDckZ5TaajEcGFc= + github.com/openstack-k8s-operators/lib-common/modules/storage v0.0.0-20220923094431-9fca0c85a9dc/go.mod h1:fhM62I45VF/5WVpOP1h9OpTfFn+lF2XGrT5jUBKEHVc= ++github.com/openstack-k8s-operators/lib-common/modules/storage v0.0.0-20221117092428-c1190ea3bf3d h1:b2dqfShyX4NO/NMe3rTK1xWRF91ITobjQEfO6ftO6yM= ++github.com/openstack-k8s-operators/lib-common/modules/storage v0.0.0-20221117092428-c1190ea3bf3d/go.mod h1:fhM62I45VF/5WVpOP1h9OpTfFn+lF2XGrT5jUBKEHVc= + github.com/openstack-k8s-operators/mariadb-operator/api v0.0.0-20221014164348-0a612ae8b391 h1:Bd1e4CG/0gQbRoSH1EJLS1tin9XUjPR2s1e+dpBHiUs= + github.com/openstack-k8s-operators/mariadb-operator/api v0.0.0-20221014164348-0a612ae8b391/go.mod h1:HiEKXmDSJ6Gl+pN7kK5CX1sgOjrxybux4Ob5pdUim1M= + github.com/openstack-k8s-operators/openstack-operator/apis v0.0.0-20221107090218-8d63dba1ec13 h1:GkYSRpfdDav5HipJ2oIYqpY8crLaDVRBmhqlUztDTV4= +``` + +Now we can proceed as usual with the [Building and +Running](local.md#build-and-run) steps. + +Once we have confirmed that our code changes work as expected with the new +dependency version we can submit our PR as we normally do but including the +`go.mod` and `go.sum` changes. + +If we want to use a specific tag instead of `HEAD` we can provide it in our +calls: + +``` +go get github.com/openstack-k8s-operators/lib-common/modules/common@v0.1.0 +go get github.com/openstack-k8s-operators/lib-common/modules/database@v0.1.0 +go get github.com/openstack-k8s-operators/lib-common/modules/storage@v0.1.0 +``` + +### Unmerged local simple reference + +There will be times when we'll be working on lib-common code in our local +repository while using it in the cinder-operator and we want to test this. + +In this case we cannot use the `go get` because our lib-common module is not + within the cinder-operator command so we'll use the `go work edit` command +instead: + +``` +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/common=../lib-common/modules/common +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/database=../lib-common/modules/database +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/storage=../lib-common/modules/storage +``` + +Alternatively we can use the `hack/setdeps.py` script to do the same thing: + +``` +hack/setdeps.py lib-common=../lib-common +``` + +Now we can proceed as usual with the [Building and +Running](local.md#build-and-run) steps. If we use `make build` then we'll need +to let it know that it should use our workspace changes: + +``` +GOWORK= make build +``` + +Once we have confirmed that our code changes work as expected with the new +dependency version we will need to submit the lib-common PR first and then the +one in cinder-operator with the new dependency, as explained in the previous +section. + +### Simple reference in PR + +Somebody may have submitted a PR to lib-common and we want to start writing +code in the cinder-operator before that PR gets merged. + +Here we can either download the PR to our own local repository or just +reference the one from GitHub. + +As an example, let's look at the `extraVolumes` effort and the [lib-common +PR#88](https://github.com/openstack-k8s-operators/lib-common/pull/88). + +To reference it directly we'll go to GitHub's website and look for the source + of the PR, which in this case it's [fmount's extra_volumes branch]( +https://github.com/fmount/lib-common/tree/extra_volumes). + +So now we replace our lib-common with that one: + +``` +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/common=github.com/fmount/lib-common/modules/common@extra_volumes +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/database=github.com/fmount/lib-common/modules/database@extra_volumes +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/storage=github.com/fmount/lib-common/modules/storage@extra_volumes + +go mod tidy +``` + +Alternatively we can use the `hack/setdeps.py` to do all that for us: + +``` +hack/setdeps.py lib-common=88 +``` + +Or just the last part: + +``` +go work edit -replace lib-common=fmount/lib-common +``` + +If we have a local repository and we may want to make changes to the lib-common +code ourselves (in case we find some issue) we can pull the PR and then +reference it: + +``` +cd ../lib-common + +git fetch upstream pull/88/head:extra_volumes +git checkout extra_volumes + +cd ../cinder-operator + +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/common=../lib-common/modules/common +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/database=../lib-common/modules/database +go work edit -replace github.com/openstack-k8s-operators/lib-common/modules/storage=../lib-common/modules/storage +``` + +Alternatively we can replace the last 3 commands with a single command: + +``` +hack/setdeps.py lib-common=../lib-common +``` + +Now we can proceed as usual with the [Building and +Running](local.md#build-and-run) steps. If we use `make build` then we'll need +to let it know that it should use our workspace changes: + +``` +GOWORK= make build +``` + +### Circular references + +There are times where we have circular dependencies where cinder-operator needs + a new structure defined in the openstack-operator but at the same time the +openstack-operator uses the cinder-operator structures to define the +`OpenStackControlPlane`. + +An example of this happening is the work on the `TrasportURL` that was + introduced in [openstack-operator + PR#37](https://github.com/openstack-k8s-operators/openstack-operator/pull/27) +and used in the [cinder-operator +PR#62](https://github.com/openstack-k8s-operators/cinder-operator/pull/62). + +The solution of how to replace the dependencies is pretty much the same as the +one we've seen in the previous section, the only difference is that we need to +do it in both repositories. + +We see in github that the cinder-operator PR source branch is +`use_transport_url_crd` from `abays` and the openstack-operator PR source +branch is `rabbitmq_transporturl` from `dprince`, so we replace their +dependencies: + +``` +cd ~/cinder-operator +go work edit -replace github.com/openstack-k8s-operators/openstack-operator/apis=github.com/dprince/openstack-operator@rabbitmq_transporturl + +cd ~/openstack-operator +go work edit -replace github.com/openstack-k8s-operators/cinder-operator/api=github.com/abays/cinder-operator@use_transport_url_crd +``` + +**NOTE**: If we are using local directory references we should use absolute +paths instead of relative ones like we did before. + +In this case since we have also modified the openstack-operator we'll have to +stop it in our OpenShift cluster and run it locally before we can proceed with +[building and running the cinder-operator](local.md#build-and-run). Remember +to add the `GOWORK=` to the `make build` call. + +```sh +CSV=`oc get -l operators.coreos.com/openstack-operator.openstack= -o custom-columns=CSV:.metadata.name --no-headers csv` + +oc edit csv $CSV +``` + +This will drop us in our editor with the contents of CSV YAML manifest where +we'll search for the first instance of `name: +openstack-operator-controller-manager`, and we should see something like: + +``` + - label: + control-plane: controller-manager + name: openstack-operator-controller-manager + spec: + replicas: 1 + selector: + matchLabels: + control-plane: controller-manager +``` + +Where we see `replicas: 1` change it to `replicas: 0`, save and exit. This +triggers the termination of the openstack-operator pod. + +Now we just build and run our local openstack-operator: + +``` +cd ~/openstack-operator +GOWORK= make run +``` + +Now we can continue, in another terminal, [running the cinder-operator +locally](local.md#build-and-run) but this time we cannot use `make run`, +because it will fail due to ports :8080 and :8081 already being in use: + +``` +bin/manager -metrics-bind-address=:8082 -health-probe-bind-address=:8083 +``` From cde3567a03f9c80b734e8883cbd2fd4424c3b89b Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Tue, 22 Nov 2022 14:03:07 +0100 Subject: [PATCH 6/7] Docs: Add debug guide This patch introduces a simple debug guide to help new developers. --- CONTRIBUTING.md | 8 + docs/dev/debug.md | 437 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 445 insertions(+) create mode 100644 docs/dev/debug.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e114fc06..fdf12a1f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -78,6 +78,14 @@ There is a script called `hack/checkout_pr.sh` that is helpful when we want to test an existing PR that has dependencies. Check the [Testing PR section in the hack documentation](hack/README.md#testing-prs) for additional information. +### Debugging + +When working on the cinder-operator there will be times where things won't work +as expected and we'll need to debug things. + +In the [debugging article](docs/dev/debug.md) we present some ideas to help you +figure things out. + ### Pull Requests While the pull request flow is used for submitting new issues and for diff --git a/docs/dev/debug.md b/docs/dev/debug.md new file mode 100644 index 00000000..14bcb2ed --- /dev/null +++ b/docs/dev/debug.md @@ -0,0 +1,437 @@ +# Debugging + +When we deploy OpenStack using operators there are many moving pieces that must +work to get a running OpenStack deployment: OLM, OpenStack Operator, MariaDB +operator, RabbbitMQ operator, Keystone Operator, Cinder Operator, etc. For that +reason it's good to know a bit about the different pieces and how they connect +to each other. + +Besides reading this guide it is recommended to read the [Debug Running Pods +documentation]( +https://kubernetes.io/docs/tasks/debug/debug-application/debug-running-pod). + +Usually the first step to resolve issues is to figure out **where** the issue +is happening and we can do it starting from the OLM and go forward through the +steps, do it in reverse starting from the cinder-operator and move backwards, +or anything in between. + +### General flow + +To be able to locate where things are failing we first need to know what the +expected steps are: + +- [Deploying operators](#deploying-operators) +- [Propagating CRs](#propagating-crs) +- [Waiting for services](#waiting-for-services) +- [Deploying Cinder](#deploying-cinder) + +##### Deploying operators + +The expected result of running `make openstack` is to have the OpenStack +operators running in our OpenShift cluster. + +The [Operator Lifecycle Manager (OLM)](https://olm.operatorframework.io/docs/) +is used to deploy the operators, and it is recommended to read its +documentation to understand it, but let's have a quick overview here. + +When we are packaging our operator to be delivered through the OLM we create 3 +container images, the operator itself, a bundle, and an index. + +The bundle image contains the [`ClusterServiceVersion` +(CSV)](https://olm.operatorframework.io/docs/concepts/crds/clusterserviceversion/), +which we can think to be something like an RPM. It contains metadata about a +specific operator version as well as a template to be used by the OpenShift +Deployment operator to create our operator pods. + +The index index image holds a sqlite database with bundle definitions and it +runs a grpc service when executed that lets consumers query the operators. + +The operator contains the service with the controllers for a number of CRDs. + +So how is the `make openstack` command actually deploying our operators using +those images? + +The first thing it does is create an [`OperatorGroup`](https://docs.openshift.com/container-platform/4.11/operators/understanding/olm/olm-understanding-operatorgroups.html) to +provide multitenant configuration selecting the target namespaces in which to +generate required RBAC access for its member Operators. + +Then it creates a [`CatalogSource`]( +https://olm.operatorframework.io/docs/concepts/crds/catalogsource/) to +represent a specific location that can be queried to discover and install +operators and their dependencies. In our case this points to the index image, +that runs a grpc service as mentioned before. + +This step is necessary because our operator we are installing is not present in +the default catalog included in OpenShift. + +At this point OpenShift knows everything about our operators, but we still need +to tell it that we want to install that specific operator. This is where the +[`Subscription`]( +https://olm.operatorframework.io/docs/concepts/crds/subscription/) comes into +play. + +A Subscription represents an intention to install an operator and the `make +openstack` command creates one for our operator and specifies our +`CatalogSource` as the source to find our operator. This means that it will +install our custom operator even if we have an official operator already +released in the official operator catalog. + +This newly created `Subscription` triggers the creation of the index pod so the +OLM can query the information, then an [`InstallPlan`]( +https://docs.openshift.com/container-platform/4.11/rest_api/operatorhub_apis/installplan-operators-coreos-com-v1alpha1.html) +gets created to take care of installing the resources for the operator. + +If the operators are not correctly deployed after running `make openstack`, +then we should look into the `InstallPlan` and check for errors. + +``` +oc describe InstallPlan | less +``` + +If there is no `InstallPlan` we have to check if the index pod is running: + +``` +oc get pod -l olm.catalogSource=openstack-operator-index +``` + +If it isn't, check for the `CatalogSource`: + +``` +oc describe catalogsource openstack-operator-index +``` + +##### Propagating CRs + +When we run `make openstack_deploy` we are basically applying our +`OpenStackControlPlane` manifest, as defined in the `OPENSTACK_CR` +environmental variable, which is a CRD defined by the openstack-operator. + +The openstack-operator has a controller watching for `OpenStackControlPlane` +resources, so when it sees a new one it starts working to reconcile it. In this +case that means propagating the `template` in the `cinder` section into a new +`Cinder` resource. + +So after the `OpenStackControlPlane` resource we should be able to see the +`Cinder` resource created by the openstack-operator, and this should contain +the same information present in the `template` section in the `cinder` section +of our manifest. + +``` +oc get cinder +``` + +If we don't see this resource, then we need to check first that the `enabled` +key inside the `cinder` section is not set to `false` and then look at the +openstack-operator logs and search for reconciliation errors. + +``` +OPENSTACK_OPERATOR=`oc get pod -l control-plane=controller-manager -o custom-columns=POD:.metadata.name|grep openstack` +oc logs $OPENSTACK_OPERATOR +``` + +Something similar should happen for the keystone and glance operators. + +``` +oc get keystoneapis +oc get glances +``` + +##### Waiting for services + +Now that we have a `Cinder` resource it's the cinder-operator's turn, that has +a specific controller waiting for these `Cinder` resource, but before it starts +deploying the cinder services it has to make sure that everything is in place +for the services to run correctly. + +The `Cinder` controller keeps the status of each of the steps it needs to +perform as conditions, which can be checked with `oc describe cinder`. + +The steps are: + +- Request the RabbitMQ transport url information: A `TransportURL` resource is + created by the `Cinder` controller and will be handled the openstack-operator + that waits until the RabbitMQ has been deployed and is ready. + + While RabbitMQ is not ready we can see that the `TransportURL` condition + called `TransportURLReady` has a `False` status. + + Once RabbitMQ can accept requests the openstack-operator sets the + `TransportURLReady` condition to `True` and creates a secret and references + it in the `TransportURL` status field called `Secret Name`. + + The condition in the `Cinder` resource for this step is called + `CinderRabbitMqTransportURLReady`. + + If we never see the condition changing we should check the RabbitMQ pod (`oc + describe rabbitmq-server-0`) or its operator (`oc describe + controller-manager-8674c4db5c-lq56w`) + +- Check for required OpenStack secret: Condition `InputReady`. + +- Create `ConfigMap`s and `Secret`s: Condition `ServiceConfigReady`. + +- Request a database: Condition `DBReady`. The new database for cinder is + requested using the `mariadbdatabase` resource that is handled by the + mariadb-operator. + +- Run the database initialization code: Condition `DBSyncReady`. The db sync + is executed using an OpenShift [`Job`]( + https://docs.openshift.com/container-platform/4.11/nodes/jobs/nodes-nodes-jobs.html). + +##### Deploying Cinder + +Now that everything is ready the `Cinder` controller will request the creation +of the different cinder services: API, backup, scheduler, and volume. + +Each of the services has its own CRD (`cinderapi`, `cinderbackup`, +`cinderscheduler`, and `cindervolume`) and there is a specific controller in +the cinder-operator to reconcile each of these CRDs, just like there was one +for the top level `Cinder` CRD. + +If the cinder-operator is running successfully then it should generate those +resources based on the top level `Cinder` resource and the information gathered +by the `TransportURL`, the `mariadbdatabase`, and the generated `ConfigMap`s +and `Secret`s. + +If we don't see a specific resource kind it may be because its section in +`Cinder` didn't have the `replication` field set to 1 or greater, or because +there is a failure during it's creation. In this last case we should check the +cinder-operator's log and look for the error: + +``` +CINDER_OPERATOR=`oc get pod -l control-plane=controller-manager -o custom-columns=POD:.metadata.name|grep cinder` +oc logs $CINDER_OPERATOR +``` + +Each of these 4 controllers is responsible for a specific cinder service, and +the API will use a [`Deployment`]( +https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) but the +other 3 services will use a [`StatefulSet`]( +https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) +instead, because the first doesn't care about the hostname but the other 3 do, +since it is used by the Cinder code. + +The operator deploys an independent pod for each cinder volume backend instead +of deploying them all in a single pod. + +At this point we should see the pods for each service running, or at least +trying to run. If they cannot run successfully we should `describe` the pod to +see what is failing. + +It's important to know that these 4 services use the [Kolla project]( +https://wiki.openstack.org/wiki/Kolla) to prepare and start the service. + +### OpenStack CRDs + +The specific CRDs for the whole OpenStack effort can be listed after +successfully running `make openstack` with: + +``` +oc get crd -l operators.coreos.com/openstack-operator.openstack= +``` + +### Configuration generation + +In the [waiting for services section](#waiting-for-services) we described the +creation of `ConfigMap`s and `Secret`s. Those are basically the scripts and +default configuration from the `templates` directory, but then we need to add +to that default configuration the customizations from the user, such as the +backend configuration. + +The final configuration is created by an [init container]( +https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) in each +service pod running `templates/cinder/bin/init.sh` and using templates and +environmental variables. + +The script generates files in `/var/lib/config-data/merged` before any other +container in the pod is started and then the directory is available in the +probe and service containers. + +There may be times where we want to debug what is happening in this +configuration generation, for the purpose we have the `initContainer` key in +the `debug` section of our manifests. + +For example, if we wanted to debug this creation in the cinder-scheduler +service we would edit the top level `OpenStackControlPlane`: + +``` +oc edit OpenStackControlPlane openstack +``` + +And then in the `template` section of the `cinder` section look for +`cinderScheduler` and in the `debug` key change `initContainer: false` to +`initContainer: true`, then save and exit. + +After saving the openstack-operator will notice the change and propagate this +change to the `Cinder` resource, which in turn will get propagated by the +cinder-operator into the `CinderScheduler` resource, which will terminate the +existing pod and create a new one changing the command that is run. + +When debugging the init container the command that is run is no longer the +`init.sh` script mentioned before, instead it executes a loop that sleeps for +5 seconds while file `/tmp/stop-init-container` exist. + +So once the debug mode has been applied we can see that the status of the +container is `Init`: + +``` +$ oc get pod cinder-scheduler-0 +NAME READY STATUS RESTARTS AGE +cinder-scheduler-0 0/2 Init:0/1 0 3m2s +``` + +And we can go into the init container and manually run the script: + +``` +oc exec -it cinder-scheduler-0 -c init /bin/bash + +# See source files +ls /etc/cinder/cinder.conf +ls /var/lib/config-data/ + +# Run script +/usr/local/bin/container-scripts/init.sh + +# See merged result +ls /var/lib/config-data/merged +``` + +Once we are satisfied with the resulting configuration files we can `rm +/tmp/stop-init-container` and the container will be destroyed (we'll see +*command terminated with exit code 137* message and the service and probe +containers will be started. + +### Cinder services + +Even though we are not working on the cinder service there will be times when +we need to debug it. + +Similarly to the [init container debugging](#configuration-generation) we also +have a way to enable debugging in the service with the `service` key under +`debug` in the specific service section we want to debug. + +Here the command for the service container is changed to an infinite sleep and +the service container probes are disabled to prevent the container from getting +constantly restarted. + +**NOTE**: Setting the service debug mode to `true` the easiest way to debug a +container that is in `CrashLoopBackOff` due to probe failures. + +Once the pod has been restarted and the service container is just sleeping we +can go into the container. + +``` +oc exec -it cinder-scheduler-0 /bin/bash +``` + +As mentioned before we are using [Kolla](https://wiki.openstack.org/wiki/Kolla) +to prepare and start the service, so it's a good idea to be familiar with [its +API](https://docs.openstack.org/kolla/ussuri/admin/kolla_api.html). + +The first thing we need to do is to ask Kolla to move some files around for the +service to be able to run. The files are defined in the kolla configuration +json file that lives in `$KOLLA_CONFIG_FILE`. This is important because in +this step the merged config file is moved to `/etc/cinder/cinder.conf`. + +To trigger the file moves we run: + +``` +/usr/local/bin/kolla_set_configs +``` + +Now we can check that everything looks right, and then start the service: + +``` +/usr/local/bin/kolla_start +``` + +If we see an unexpected behavior we can hit Ctrl+C to stop the service and +explore the system, make changes to files or code (for example set a `pdb` +trace), etc. + +Alternatively to setting the `service` to `debug` we can also run: + +``` +oc debug cinder-scheduler-0 -c cinder-scheduler +``` + +And we'll get a pod in debug mode which is an exact copy of our running pod +configuration but going into a shell. + +### Probes + +Difference API and the others + +There are 3 kind of probes in OpenShift: Liveness, Readiness and Startup. +Please [refer to the documentation for more information on them]( +https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes). + +The cinder API service has the liveness and readiness probes. The readiness +probe checks that the API can respond to HTTP requests so it can be taken off +the OpenShift service load balancer if it can't. The liveness checks the +`/healthcheck` endpoint. + +The other cinder services -scheduler, volume, and backup- don't have the +readiness probe because they don't leverage the OpenShift Service and its load +balancer, as they communicate through REST API but RabbitMQ RPCs instead. So +these 3 services only have a Startup and Liveness probes. + +The probes of these 3 internal services do HTTP requests to a very simple HTTP +server that runs in a container within the service pod. The probe server code +is at `templates/cinder/bin/healthcheck.py` and its port is not exposed outside +the pod because OpenShift probe checking is done from the same network +namespace as the pod. + +The probe server is quite simple right now and all it does is go to the Cinder +database and check the status of the service. If the service doesn't appears +as up, then it will return 500 status error code. + +In the future the probe may be extended to send RabbitMQ requests (for example +a get logs call) to check for connectivity. + +If we want to debug the probe we should set the service on debug mode and start +the service as described in the [Cinder services section](#cinder-services), +and then go into a different terminal and go into the probe container and +run the script, probably with `pdb`, at this point we can make the query from +another terminal. + +For example, after the cinder-scheduler is manually started we run in terminal +number 2: + +``` +oc exec -it cinder-scheduler-0 -c probe /bin/bash +python3 -m pdb /usr/local/bin/container-scripts/healthcheck.py scheduler /var/lib/config-data/merged/cinder.conf + +# now we run the script, set break points, etc +``` + +Once the probe server is listening to requests we can go to our terminal 3 and: + +``` +oc exec -it cinder-scheduler-0 -c probe /bin/bash +curl localhost:8080 +``` + +### Operator + +The easiest way to debug the operator is running it locally and modify the +command used to run the operator to include the debugger. For example if we +are using [Delve](https://github.com/go-delve/delve) we can do: + +```sh +make build +OPERATOR_TEMPLATES=$PWD/templates dlv exec ./bin/manager +``` + +Debugging using your own IDE [locally]( +https://golangforall.com/en/post/goland-debugger.html) or [remotely when using +a VM](https://golangforall.com/en/post/go-docker-delve-remote-debug.html) +requires some extra steps but is also possible. + +If we want to debug the container inside OpenShift we could build a custome +container image that has the dlv command, set the service to `debug`, and then +run the debugger: + +``` +oc exec -it /dlv exec /manager +``` From 6b414d42b29ef587d4ae39d9d1ab2248e153e2cd Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Wed, 30 Nov 2022 13:52:38 +0100 Subject: [PATCH 7/7] Docs & Hack: extraVol changes This patch makes changes to the getting started scripts and manifest to adapt to the new extraVol changes. --- .gitignore | 4 ++ hack/dev/ceph/I_AM_A_DEMO | 0 hack/dev/ceph/ceph.client.admin.keyring | 6 --- hack/dev/ceph/ceph.conf | 16 -------- hack/dev/ceph/ceph.mon.keyring | 9 ----- hack/dev/create-ceph.sh | 38 ++++++++++++++++-- hack/dev/openstack-ceph.yaml | 52 ++++++++++++++++--------- hack/dev/openstack-lvm-ceph.yaml | 52 ++++++++++++++++--------- 8 files changed, 106 insertions(+), 71 deletions(-) delete mode 100644 hack/dev/ceph/I_AM_A_DEMO delete mode 100644 hack/dev/ceph/ceph.client.admin.keyring delete mode 100644 hack/dev/ceph/ceph.conf delete mode 100644 hack/dev/ceph/ceph.mon.keyring diff --git a/.gitignore b/.gitignore index 15cee214..52cf16b0 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,7 @@ CI_TOOLS_REPO # generated workspace file go.work go.work.sum + +# Hack files that are modified by scripts +hack/dev/openstack-ceph.yaml +hack/dev/openstack-lvm-ceph.yaml diff --git a/hack/dev/ceph/I_AM_A_DEMO b/hack/dev/ceph/I_AM_A_DEMO deleted file mode 100644 index e69de29b..00000000 diff --git a/hack/dev/ceph/ceph.client.admin.keyring b/hack/dev/ceph/ceph.client.admin.keyring deleted file mode 100644 index a07f2077..00000000 --- a/hack/dev/ceph/ceph.client.admin.keyring +++ /dev/null @@ -1,6 +0,0 @@ -[client.admin] - key = AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== - caps mds = "allow *" - caps mgr = "allow *" - caps mon = "allow *" - caps osd = "allow *" diff --git a/hack/dev/ceph/ceph.conf b/hack/dev/ceph/ceph.conf deleted file mode 100644 index 6662ff1f..00000000 --- a/hack/dev/ceph/ceph.conf +++ /dev/null @@ -1,16 +0,0 @@ -[global] -fsid = 5fe62cc7-0392-4a32-8466-081ce0ea970f -mon initial members = localhost -mon host = v2:192.168.130.1:3300/0 -osd crush chooseleaf type = 0 -osd journal size = 100 -public network = 0.0.0.0/0 -cluster network = 0.0.0.0/0 -osd pool default size = 1 -mon warn on pool no redundancy = false -auth allow insecure global id reclaim = false -osd objectstore = bluestore - -[osd.0] -osd data = /var/lib/ceph/osd/ceph-0 - diff --git a/hack/dev/ceph/ceph.mon.keyring b/hack/dev/ceph/ceph.mon.keyring deleted file mode 100644 index 2724a18d..00000000 --- a/hack/dev/ceph/ceph.mon.keyring +++ /dev/null @@ -1,9 +0,0 @@ -[mon.] - key = AQBCtBhjSAiDFhAAiNDfWsKMES1krJAye5sk0Q== - caps mon = "allow *" -[client.admin] - key = AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== - caps mds = "allow *" - caps mgr = "allow *" - caps mon = "allow *" - caps osd = "allow *" diff --git a/hack/dev/create-ceph.sh b/hack/dev/create-ceph.sh index f1871d9f..e4702917 100755 --- a/hack/dev/create-ceph.sh +++ b/hack/dev/create-ceph.sh @@ -1,14 +1,44 @@ #!/bin/env bash +set -e LOCATION=$(realpath `dirname -- $BASH_SOURCE[0]`) -sudo cp -R "${LOCATION}/ceph" /etc +SECRET_NAME=${1:-ceph-client-files} +OPENSTACK_YAMLS="$LOCATION/openstack-ceph.yaml $LOCATION/openstack-lvm-ceph.yaml" # Change Ceph default features (if we want to attach using krbd) # echo -e "\nrbd default features = 3" | sudo tee -a /etc/ceph/ceph.conf +if sudo podman container exists ceph; then + echo 'Ceph container exists reusing it' +else + echo 'Starting ceph Pacific demo cluster' + sudo podman run -d --name ceph --net=host -e MON_IP=192.168.130.1 -e CEPH_PUBLIC_NETWORK=0.0.0.0/0 -e DEMO_DAEMONS='osd' quay.io/ceph/daemon:latest-pacific demo +fi -echo 'Running ceph Pacific demo cluster' -sudo podman run -d --name ceph --net=host -v /etc/ceph:/etc/ceph:z -v /lib/modules:/lib/modules -e MON_IP=192.168.130.1 -e CEPH_PUBLIC_NETWORK=0.0.0.0/0 -e DEMO_DAEMONS='osd' quay.io/ceph/daemon:latest-pacific demo +echo 'Waiting for Ceph config files to be created' +until sudo podman exec -t ceph test -e /etc/ceph/I_AM_A_DEMO +do + echo -n . + sleep 0.5 +done +echo -sleep 3 +TEMPDIR=`mktemp -d` +trap 'sudo rm -rf -- "$TEMPDIR"' EXIT +echo 'Copying Ceph config files from the container to $TEMPDIR' +sudo podman cp ceph:/etc/ceph/ceph.conf $TEMPDIR +sudo podman cp ceph:/etc/ceph/ceph.client.admin.keyring $TEMPDIR +sudo chown `whoami` $TEMPDIR/* + +echo "Replacing openshift secret $SECRET_NAME" +oc delete secret $SECRET_NAME 2>/dev/null || true +oc create secret generic $SECRET_NAME --from-file=$TEMPDIR/ceph.conf --from-file=$TEMPDIR/ceph.client.admin.keyring + +FSID=`grep fsid $TEMPDIR/ceph.conf | cut -d' ' -f 3` +for manifest in $OPENSTACK_YAMLS +do + # echo "Replacing Ceph FSID in $OPENSTACK_YAML with $FSID" + echo "Replacing Ceph FSID in $manifest with $FSID" + sed -i "s/rbd_secret_uuid\\s*=.*/rbd_secret_uuid=$FSID/g" "$manifest" +done sudo podman exec -it ceph bash -c 'ceph osd pool create volumes 4 && ceph osd pool application enable volumes rbd' sudo podman exec -it ceph bash -c 'ceph osd pool create backups 4 && ceph osd pool application enable backups rbd' diff --git a/hack/dev/openstack-ceph.yaml b/hack/dev/openstack-ceph.yaml index 224d400d..095be683 100644 --- a/hack/dev/openstack-ceph.yaml +++ b/hack/dev/openstack-ceph.yaml @@ -41,6 +41,13 @@ spec: customServiceConfig: | [DEFAULT] debug = true + enabled_backends=default_backend:rbd + + [default_backend] + rbd_store_ceph_conf=/etc/ceph/ceph.conf + rbd_store_user=admin + rbd_store_pool=images + store_description=Ceph glance store backend. glanceAPIInternal: debug: service: false @@ -51,14 +58,6 @@ spec: service: false preserveJobs: false replicas: 1 - cephBackend: - cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f - cephMons: 192.168.130.1 - cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== - cephUser: admin - cephPools: - glance: - name: images cinder: enabled: true template: @@ -99,16 +98,33 @@ spec: customServiceConfig: | [DEFAULT] enabled_backends=ceph + [ceph] + volume_backend_name=ceph + volume_driver=cinder.volume.drivers.rbd.RBDDriver + rbd_ceph_conf=/etc/ceph/ceph.conf + rbd_user=admin + rbd_pool=volumes + rbd_flatten_volume_from_snapshot=False + rbd_secret_uuid=5fe62cc7-0392-4a32-8466-081ce0ea970f debug: initContainer: false service: false - cephBackend: - cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f - cephMons: 192.168.130.1 - cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== - cephUser: admin - cephPools: - cinder: - name: volumes - cinder_backup: - name: backup + extraMounts: + - name: cephfiles + region: r1 + extraVol: + - propagation: + - Glance + - CinderVolume + - CinderBackup + extraVolType: Ceph + volumes: + - name: ceph + projected: + sources: + - secret: + name: ceph-client-files + mounts: + - name: ceph + mountPath: "/etc/ceph" + readOnly: true diff --git a/hack/dev/openstack-lvm-ceph.yaml b/hack/dev/openstack-lvm-ceph.yaml index f4c20a77..f448ac9d 100644 --- a/hack/dev/openstack-lvm-ceph.yaml +++ b/hack/dev/openstack-lvm-ceph.yaml @@ -41,6 +41,13 @@ spec: customServiceConfig: | [DEFAULT] debug = true + enabled_backends=default_backend:rbd + + [default_backend] + rbd_store_ceph_conf=/etc/ceph/ceph.conf + rbd_store_user=admin + rbd_store_pool=images + store_description=Ceph glance store backend. glanceAPIInternal: debug: service: false @@ -51,14 +58,6 @@ spec: service: false preserveJobs: false replicas: 1 - cephBackend: - cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f - cephMons: 192.168.130.1 - cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== - cephUser: admin - cephPools: - glance: - name: images cinder: enabled: true template: @@ -116,16 +115,33 @@ spec: customServiceConfig: | [DEFAULT] enabled_backends=ceph + [ceph] + volume_backend_name=ceph + volume_driver=cinder.volume.drivers.rbd.RBDDriver + rbd_ceph_conf=/etc/ceph/ceph.conf + rbd_user=admin + rbd_pool=volumes + rbd_flatten_volume_from_snapshot=False + rbd_secret_uuid=5fe62cc7-0392-4a32-8466-081ce0ea970f debug: initContainer: false service: false - cephBackend: - cephFsid: 5fe62cc7-0392-4a32-8466-081ce0ea970f - cephMons: 192.168.130.1 - cephClientKey: AQBCtBhj0gM6FRAACq4EGHK6qYqRBSbw4zFavg== - cephUser: admin - cephPools: - cinder: - name: volumes - cinder_backup: - name: backup + extraMounts: + - name: cephfiles + region: r1 + extraVol: + - propagation: + - Glance + - CinderVolume + - CinderBackup + extraVolType: Ceph + volumes: + - name: ceph + projected: + sources: + - secret: + name: ceph-client-files + mounts: + - name: ceph + mountPath: "/etc/ceph" + readOnly: true