From ba7d3367f85f20242d5a43b340d5530d0acd4760 Mon Sep 17 00:00:00 2001 From: Bohdan Dobrelia Date: Wed, 11 Dec 2024 18:07:37 +0100 Subject: [PATCH] Multi-cell adoption for remaining services Split edpm nodes into compute cells by 1:1 mapping it as dataplane nodesets. Use edpm_nodes var to describe compuptes for each cell, instead of static host and ip vars that only used to work for a single-cell standalone, or multi-node single cell cases. Also explain EDPM net config requirements in vars.sample, when it is used outside of ci-framework (local deployments). Remove edpm_computes vars no longer used after moving stopping control-plane tripleo services into edpm-ansible Simplify ENV headers management by collecting in a single place. Provide a variable to define the source cloud Ironic topology, for any cells with Ironic services. Align nova/libvirt and related services ordering in the lists of services defined in multiple places, with those specified in VA. Align the names in the tests to follow the documented steps to make the corresponding code easy discoverable. Adjust storage/storageRequests values to make it better fitting a multi-cell test scenarios. Also provide values in docs and add a comment to adjust them as needed. Stop ovn services only if active, or not missing (like on the cell controllers) Signed-off-by: Bohdan Dobrelia --- .../assemblies/development_environment.adoc | 139 ++++++- docs_dev/assemblies/tests.adoc | 2 + ...ng-compute-services-to-the-data-plane.adoc | 378 +++++++++++++----- ...-networker-services-to-the-data-plane.adoc | 1 - .../proc_adopting-the-compute-service.adoc | 114 ++++-- .../proc_configuring-data-plane-nodes.adoc | 3 +- .../proc_deploying-backend-services.adoc | 37 +- ...rating-databases-to-mariadb-instances.adoc | 6 +- ...t-forward-upgrade-on-compute-services.adoc | 135 +++++-- ...pology-specific-service-configuration.adoc | 2 +- .../templates/openstack_control_plane.j2 | 32 ++ .../roles/common_defaults/defaults/main.yaml | 50 +++ .../control_plane_rollback/defaults/main.yaml | 1 + .../dataplane_adoption/defaults/main.yaml | 13 +- .../roles/dataplane_adoption/tasks/main.yaml | 287 +++++++++---- .../dataplane_adoption/tasks/nova_ffu.yaml | 102 +++-- tests/roles/dataplane_adoption/vars/rhev.yaml | 1 + tests/roles/nova_adoption/defaults/main.yaml | 93 +++-- .../nova_adoption/tasks/nova_ironic.yaml | 9 +- .../nova_adoption/tasks/nova_libvirt.yaml | 25 +- tests/roles/nova_adoption/tasks/wait.yaml | 2 +- tests/roles/ovn_adoption/tasks/main.yaml | 18 +- tests/vars.sample.yaml | 61 ++- 23 files changed, 1135 insertions(+), 376 deletions(-) diff --git a/docs_dev/assemblies/development_environment.adoc b/docs_dev/assemblies/development_environment.adoc index 2663a0c30..faa7e4ff2 100644 --- a/docs_dev/assemblies/development_environment.adoc +++ b/docs_dev/assemblies/development_environment.adoc @@ -169,6 +169,104 @@ https://openstack-k8s-operators.github.io/data-plane-adoption/dev/#_reset_the_en ''' +== Deploying TripleO With Multiple Cells + +A TripleO Standalone setup creates only a single Nova v2 cell, with a combined controller and compute services on it. +In order to deploy multiple compute cells for adoption testing (without Ceph), create a 5 VMs, with the following requirements met: + +* Named `edpm-compute-0` .. `edpm-compute-4`. +* Running RHEL 9.2, with RHOSP 17.1 repositories configured. +* Can login via SSH without a password as the root user, from the hypervisor host. +* User `zuul` is created, and can sudo without a password, and login via SSH without a password, from the hypervisor host. +* User `zuul` can login to `edpm-compute-1`, `edpm-compute-2`, `edpm-compute-3`, `edpm-compute-4` nodes via SSH without a password, from the `edpm-compute-0` node, +by using the generated `/home/zuul/.ssh/id_rsa` private key. +* RedHat registry credentials are exported on the hypervisor host. + +Adjust the following commands for a repositories configuration tool of your choice: + +[,bash] +---- +export RH_REGISTRY_USER="" +export RH_REGISTRY_PWD="" + +DEFAULT_CELL_NAME="cell3" <1> +RENAMED_CELLS="cell1 cell2 $DEFAULT_CELL_NAME" + +cd ~/install_yamls/devsetup +cat < /tmp/osp17_repos +# Use a tool of your choice: +# 1. Rhos-release example steps are only available from the internal RedHat network +# ... skipping download and install steps ... +# sudo rhos-release -x +# sudo rhos-release 17.1 + +# 2. Subscription-manager example steps require an active registration +# subscription-manager release --set=9.2 +# subscription-manager repos --disable=* +# sudo subscription-manager repos \ +# --enable=rhel-9-for-x86_64-baseos-eus-rpms \ +# --enable=rhel-9-for-x86_64-appstream-eus-rpms \ +# --enable=rhel-9-for-x86_64-highavailability-eus-rpms \ +# --enable=openstack-17.1-for-rhel-9-x86_64-rpms \ +# --enable=rhceph-6-tools-for-rhel-9-x86_64-rpms \ +# --enable=fast-datapath-for-rhel-9-x86_64-rpms + +# firstboot commands +sudo dnf install -y git curl wget podman python3-tripleoclient openvswitch3.1 NetworkManager-initscripts-updown \ +sudo dnf install -y util-linux cephadm driverctl lvm2 jq nftables iptables-nft openstack-heat-agents \ + os-net-config python3-libselinux python3-pyyaml rsync tmpwatch sysstat iproute-tc +sudo dnf install -y puppet-tripleo puppet-headless +sudo dnf install -y openstack-selinux +EOF + +export CENTOS_9_STREAM_URL= +export NTP_SERVER= + +export MANILA_ENABLED=false +export EDPM_COMPUTE_CEPH_ENABLED=false +export EDPM_COMPUTE_CEPH_NOVA=false +export EDPM_COMPUTE_CELLS=3 + +export STANDALONE_EXTRA_CMD="bash -c 'echo \"$RH_REGISTRY_PWD\" > ~/authfile; chmod 0600 ~/authfile; sudo dnf install -y podman; sudo /bin/podman login registry.redhat.io -u \"$RH_REGISTRY_USER\" --password-stdin < ~/authfile'" +export EDPM_FIRSTBOOT_EXTRA=/tmp/osp17_repos +export EDPM_TOTAL_NODES=1 +export SKIP_TRIPLEO_REPOS=false +export EDPM_COMPUTE_NETWORK_IP=192.168.122.1 +export HOST_PRIMARY_RESOLV_CONF_ENTRY=192.168.122.1 +export BASE_DISK_FILENAME="rhel-9-base.qcow2" + +EDPM_COMPUTE_SUFFIX=0 IP=192.168.122.100 EDPM_COMPUTE_DISK_SIZE=10 EDPM_COMPUTE_RAM=9 EDPM_COMPUTE_VCPUS=2 make edpm_compute +EDPM_COMPUTE_SUFFIX=1 IP=192.168.122.103 EDPM_COMPUTE_DISK_SIZE=17 EDPM_COMPUTE_RAM=12 EDPM_COMPUTE_VCPUS=4 make edpm_compute +EDPM_COMPUTE_SUFFIX=2 IP=192.168.122.106 EDPM_COMPUTE_DISK_SIZE=14 EDPM_COMPUTE_RAM=12 EDPM_COMPUTE_VCPUS=4 make edpm_compute +EDPM_COMPUTE_SUFFIX=3 IP=192.168.122.107 EDPM_COMPUTE_DISK_SIZE=12 EDPM_COMPUTE_RAM=4 EDPM_COMPUTE_VCPUS=2 make edpm_compute +EDPM_COMPUTE_SUFFIX=4 IP=192.168.122.109 EDPM_COMPUTE_DISK_SIZE=16 EDPM_COMPUTE_RAM=12 EDPM_COMPUTE_VCPUS=4 make edpm_compute + +for n in 0 3 6 7 9; do + # w/a bad packages installation, if done by firstboot - resulting in rpm -V check failures in tripleo-ansible + ssh -o StrictHostKeyChecking=false -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa \ + root@192.168.122.10${n} dnf install -y openstack-selinux ';' \ + dnf reinstall -y openstack-selinux + ssh -o StrictHostKeyChecking=false -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa \ + root@192.168.122.10${n} useradd --create-home --shell /bin/bash --groups root zuul ';' \ + mkdir -p /home/zuul/.ssh + scp -o StrictHostKeyChecking=false -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa \ + ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa root@192.168.122.10${n}:/home/zuul/.ssh/id_rsa + ssh -o StrictHostKeyChecking=false -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa \ + root@192.168.122.10${n} ssh-keygen -yf /home/zuul/.ssh/id_rsa '>' /home/zuul/.ssh/id_rsa.pub + ssh -o StrictHostKeyChecking=false -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa \ + root@192.168.122.10${n} cp /root/.ssh/authorized_keys /home/zuul/.ssh/authorized_keys + ssh -o StrictHostKeyChecking=false -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa \ + root@192.168.122.10${n} chown zuul: /home/zuul/.ssh/* + ssh -o StrictHostKeyChecking=false -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa \ + root@192.168.122.10${n} echo "zuul ALL=NOPASSWD:ALL" '>' /etc/sudoers.d/zuul +done + +make tripleo_deploy + +for n in 0 1 2 3 4; do make standalone_snapshot EDPM_COMPUTE_SUFFIX=$n; done +---- +<1> The source cloud default cell takes a new `$DEFAULT_CELL_NAME`. In a multi-cell adoption scenario, it may either retain its original name `default`, or become created as a last `cell`. + == Network routing Route VLAN20 to have access to the MariaDB cluster: @@ -201,8 +299,10 @@ installing the package and copying the configuration file from the virtual machi [,bash] ---- -alias openstack="ssh -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa root@192.168.122.100 OS_CLOUD=standalone openstack" +OS_CLOUD_NAME=standalone +alias openstack="ssh -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa root@192.168.122.100 OS_CLOUD=$OS_CLOUD_NAME openstack" ---- +For a multi-cell environment, set `OS_CLOUD_NAME` to `overcloud`. === Virtual machine steps @@ -327,15 +427,28 @@ make openstack == Performing the adoption procedure -To simplify the adoption procedure, copy the deployment passwords that +To simplify the adoption procedure with additional cells, copy and rename the deployment passwords that you use in copy the deployment passwords that you use in the https://openstack-k8s-operators.github.io/data-plane-adoption/user/#deploying-backend-services_migrating-databases[backend services deployment phase of the data plane adoption]. +For a single-cell standalone TripleO deployment: +[,bash] +---- +scp -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa root@192.168.122.100:/root/tripleo-standalone-passwords.yaml ~/overcloud-passwords.yaml +---- + +Further on, this password is going to be referenced as `TRIPLEO_PASSWORDS[default]` for a `default` cell name, in terms of TripleO. + +For a source cloud deployment with multiple stacks, change the above command to these: [,bash] ---- -scp -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa root@192.168.122.100:/root/tripleo-standalone-passwords.yaml ~/ +scp -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa zuul@192.168.122.100:overcloud-deploy/overcloud/overcloud-passwords.yaml ~/ +scp -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa zuul@192.168.122.100:overcloud-deploy/cell1/cell1-passwords.yaml ~/ +scp -i ~/install_yamls/out/edpm/ansibleee-ssh-key-id_rsa zuul@192.168.122.100:overcloud-deploy/cell2/cell2-passwords.yaml ~/ ---- +Note that all compute cells of the source cloud always share the same database and messaging passwords. +On the contrary, a generic split-stack topology allows using different passwords files for its stacks. The development environment is now set up, you can go to the https://openstack-k8s-operators.github.io/data-plane-adoption/[Adoption documentation] @@ -353,8 +466,10 @@ Delete the data-plane and control-plane resources from the CRC vm [,bash] ---- -oc delete --ignore-not-found=true --wait=false openstackdataplanedeployment/openstack -oc delete --ignore-not-found=true --wait=false openstackdataplanedeployment/openstack-nova-compute-ffu +for CELL in $(echo $RENAMED_CELLS); do + oc delete --ignore-not-found=true --wait=false openstackdataplanedeployment/openstack-$CELL + oc delete --ignore-not-found=true --wait=false openstackdataplanedeployment/openstack-nova-compute-ffu-$CELL +done oc delete --ignore-not-found=true --wait=false openstackcontrolplane/openstack oc patch openstackcontrolplane openstack --type=merge --patch ' metadata: @@ -373,7 +488,7 @@ oc delete --wait=false pod mariadb-copy-data || true oc delete secret osp-secret || true ---- -Revert the standalone vm to the snapshotted state +Revert the standalone vm(s) to the snapshotted state [,bash] ---- @@ -381,13 +496,25 @@ cd ~/install_yamls/devsetup make standalone_revert ---- +For a multi-cell deployment, change the above command to these: +[,bash] +---- +cd ~/install_yamls/devsetup +for n in 0 1 2 3 4; do make standalone_revert EDPM_COMPUTE_SUFFIX=$n; done +---- + Clean up and initialize the storage PVs in CRC vm [,bash] ---- cd .. for i in {1..3}; do make crc_storage_cleanup crc_storage && break || sleep 5; done +for CELL in $(echo $RENAMED_CELLS); do + oc delete pvc mysql-db-openstack-$CELL-galera-0 --ignore-not-found=true + oc delete pvc persistence-rabbitmq-$CELL-server-0 --ignore-not-found=true +done ---- +Use indexes like `*-0`, `*-1` based on the replica counts configured in `oscp/openstack` CR. ''' diff --git a/docs_dev/assemblies/tests.adoc b/docs_dev/assemblies/tests.adoc index 4203a436d..8945195ad 100644 --- a/docs_dev/assemblies/tests.adoc +++ b/docs_dev/assemblies/tests.adoc @@ -29,7 +29,9 @@ these variables suit your environment: ** `tripleo_passwords` (for each {OpenStackPreviousInstaller} Heat stack on the source cloud) ** `source_galera_members` (for each cell controller on the source cloud) ** `source_mariadb_ip` (for each cell controller on the source cloud) + ** `edpm_nodes` (for each cell compute node on the destination) ** `edpm_privatekey_path` + ** `source_ovndb_ip`` ** `timesync_ntp_servers` == Running the tests diff --git a/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc b/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc index 1fc2a7236..245c485c3 100644 --- a/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc +++ b/docs_user/modules/proc_adopting-compute-services-to-the-data-plane.adoc @@ -81,21 +81,56 @@ EOF + * If `neutron-sriov-nic-agent` is running on your {compute_service} nodes, ensure that the physical device mappings match the values that are defined in the `OpenStackDataPlaneNodeSet` custom resource (CR). For more information, see xref:pulling-configuration-from-tripleo-deployment_adopt-control-plane[Pulling the configuration from a {OpenStackPreviousInstaller} deployment]. -* You have defined the shell variables to run the script that runs the fast-forward upgrade: +* You have defined the shell variables to run the script that runs the upgrade: + ---- -PODIFIED_DB_ROOT_PASSWORD=$(oc get -o json secret/osp-secret | jq -r .data.DbRootPassword | base64 -d) -CEPH_FSID=$(oc get secret ceph-conf-files -o json | jq -r '.data."ceph.conf"' | base64 -d | grep fsid | sed -e 's/fsid = //' +$ CEPH_FSID=$(oc get secret ceph-conf-files -o json | jq -r '.data."ceph.conf"' | base64 -d | grep fsid | sed -e 's/fsid = //' -alias openstack="oc exec -t openstackclient -- openstack" -declare -A computes -export computes=( - ["standalone.localdomain"]="192.168.122.100" +$ alias openstack="oc exec -t openstackclient -- openstack" + +$ DEFAULT_CELL_NAME="cell3" <1> +$ RENAMED_CELLS="cell1 cell2 $DEFAULT_CELL_NAME" + +$ declare -A COMPUTES_CELL1 +$ export COMPUTES_CELL1=( <2> + ["standalone.localdomain"]="192.168.122.100" <3> + # ... <4> +) +$ declare -A COMPUTES_CELL2 +$ export COMPUTES_CELL2=( # ... ) +$ declare -A COMPUTES_CELL3 +$ export COMPUTES_CELL3=( + # ... <5> +) +# ... + +$ NODESETS="" +$ for CELL in $(echo $RENAMED_CELLS); do + ref="COMPUTES_$(echo ${CELL}|tr '[:lower:]' '[:upper:]')" + eval names=\${!${ref}[@]} + [ -z "$names" ] && continue <6> + NODESETS="'openstack-${CELL}', $NODESETS" +done +$ NODESETS="[${NODESETS%,*}]" ---- + -** Replace `["standalone.localdomain"]="192.168.122.100"` with the name and IP address of the {compute_service} node. +<1> The source cloud `default` cell takes a new `DEFAULT_CELL_NAME` on the destined cloud after adoption. +In a multi-cell adoption scenario, you may either retain its original name `default`, or create as `cell`, by providing the incremented index of the last cell in the source cloud (which is, by adding a 1 to it). +<2> For each cell, adjust <["standalone.localdomain"]="192.168.122.100">, and complete `COMPUTES_CELL_` data with the names and IP addresses of the {compute_service} nodes. +<3> If your deployment has a custom DNS Domain, put it in for FQDN of the nodes. The given values will be used in the dataplane node sets' `spec.nodes..hostName`. +<4> Assign all {compute_service} nodes from the source cloud `cell1` cell into `COMPUTES_CELL1`, and so on. +<5> Assign all {compute_service} nodes from the source cloud `default` cell into `openstack-`, +where `` is the `DEFAULT_CELL_NAME` environment variable value (here, it equals 'cell3'). +<6> Cells not containing compute nodes will be omitted as no node sets for it should be created. + +** A standalone TripleO only creates a default cell, so you should define that instead: ++ +---- +$ DEFAULT_CELL_NAME="cell1" +$ RENAMED_CELLS="cell1" +---- + [NOTE] Do not set a value for the `CEPH_FSID` parameter if the local storage back end is configured by the {compute_service} for libvirt. The storage back end must match the source cloud storage back end. You cannot change the storage back end during adoption. @@ -150,7 +185,7 @@ rm -f id* cd - ---- -. If you use a local storage back end for libvirt, create a `nova-compute-extra-config` service to remove pre-fast-forward workarounds and configure Compute services to use a local storage back end: +. Create a configuration map which should become common for all cells. To configure a local storage back end for libvirt: + [source,yaml] ---- @@ -158,35 +193,24 @@ $ oc apply -f - < + 99-nova-compute-cells-workarounds.conf: | <2> [workarounds] disable_compute_service_check_for_ffu=true EOF ---- + -[NOTE] -The secret `nova-cell-compute-config` auto-generates for each -`cell`. You must specify values for the `nova-cell-compute-config` and `nova-migration-ssh-key` parameters for each custom `OpenStackDataPlaneService` CR that is related to the {compute_service}. +<1> The `data` resources in the `ConfigMap` provide cell-specific configuration files. +<2> There is a requirement to index the <*.conf> files from '03' to '99', based on its precedence. +Whereis a <99-*.conf> takes top precedence. Indexes below '03' are reserved for internal use. -. If TLS Everywhere is enabled, append the following content to the `OpenStackDataPlaneService` CR: -+ -[source,yaml] ----- - tlsCerts: - contents: - - dnsnames - - ips - networks: - - ctlplane - issuer: osp-rootca-issuer-internal - caCerts: combined-ca-bundle - edpmServiceType: nova ----- +[NOTE] +You should never delete, nor overwrite, the cell1's default `nova-extra-config` configuration map assigned to its default dataplane service 'nova'. +Adopting a live cloud might require other configurations to carry over for Nova EDPM services stored in that configuration map, without overwriting or losing them. -. If you use a Ceph back end for libvirt, create a `nova-compute-extra-config` service to remove pre-fast-forward upgrade workarounds and configure Compute services to use a Ceph back end: +. To configure a Ceph back end for libvirt: + [source,yaml] ---- @@ -194,10 +218,10 @@ $ oc apply -f - < + - secretRef: + name: nova-$CELL-compute-config <2> + - secretRef: + name: nova-migration-ssh-key <3> + - configMapRef: + name: nova-cells-global-config + optional: true + playbook: osp.edpm.nova + caCerts: combined-ca-bundle + edpmServiceType: nova + containerImageFields: + - NovaComputeImage + - EdpmIscsidImage +EOF + done +---- ++ + +* If TLS Everywhere is enabled, append the following content to the `OpenStackDataPlaneService` CR: ++ +[source,yaml] +---- + tlsCerts: + contents: + - dnsnames + - ips + networks: + - ctlplane + issuer: osp-rootca-issuer-internal + caCerts: combined-ca-bundle + edpmServiceType: nova +---- ++ +<1> To enable a local metadata services for a cell, append a `spec.dataSources.secretRef` to reference +an additional auto-generated `nova-cell-metadata-neutron-config` secret. You should have also set +`spec.nova.template.cellTemplates.cell.metadataServiceTemplate.enable` in the `OpenStackControlPlane/openstack` CR. +<2> The secret `nova-cell-compute-config` auto-generates for each `cell`. +<3> You must append the `nova-cell-compute-config` and `nova-migration-ssh-key` references for each custom `OpenStackDataPlaneService` CR that is related to the {compute_service}. + +* For simple configuration overrides, we do not need a custom dataplane service. However, to reconfigure the cell `cell1` in general, +the safest option would be always creating a custom service, and a dedicated configuration map for it. + +[NOTE] +The cell `cell1` is already managed with the default `OpenStackDataPlaneService` called `nova` +and its `nova-extra-config` configuration map. Do not change the default dataplane service 'nova' definition. +The changes will be lost, when the {rhos_long} operator becomes updated with OLM. + +* When a cell spans multiple node sets, you might want to name the custom `OpenStackDataPlaneService` resources like +`nova-cell1-nfv` and `nova-cell1-enterprise`. Then the auto-generated configmaps would be named +`nova-cell1-nfv-extra-config` and `nova-cell1-enterprise-extra-config`. + +[NOTE] +Different configurations for nodes in multiple node sets of the same cell are not covered in this guide. ifeval::["{build}" == "downstream"] . Create a secret for the subscription manager: @@ -236,18 +328,60 @@ $ oc create secret generic redhat-registry \ * Replace `` with the applicable user name. * Replace `` with the applicable password. endif::[] ++ + +[NOTE] +The `subscription-manager` secret does not need to be referenced in `OpenStackDataPlaneService`'s `spec.dataSources` data. +It is already passed in via a node-specific `OpenStackDataPlaneNodeSet` data in `spec.nodeTemplate.ansible.ansibleVarsFrom`. + + +. Create the dataplane node sets definitions for each cell: -. Deploy the `OpenStackDataPlaneNodeSet` CR: + [source,yaml] ---- -$ oc apply -f - <> computes-$CELL << EOF + ${compute}: + hostName: $compute + ansible: + ansibleHost: $compute + networks: <1> + - defaultRoute: true + fixedIP: ${!ip} + name: ctlplane + subnetName: subnet1 + - name: internalapi + subnetName: subnet1 + - name: storage + subnetName: subnet1 + - name: tenant + subnetName: subnet1 +EOF + ind=$(( ind + 1 )) + done + + test -f computes-$CELL || continue + if [ "$CELL" = "cell1" ]; then + GLOBAL="- ssh-known-hosts" + else + GLOBAL=" " + fi + cat > nodeset-${CELL}.yaml < spec: - tlsEnabled: false <1> + tlsEnabled: false <3> networkAttachments: - ctlplane preProvisioned: true @@ -261,36 +395,22 @@ endif::[] - validate-network - install-os - configure-os - - ssh-known-hosts + $GLOBAL - run-os - reboot-os - install-certs - - libvirt - - nova - ovn - neutron-metadata - - telemetry + - libvirt + - nova-$CELL + - telemetry <4> env: - name: ANSIBLE_CALLBACKS_ENABLED value: "profile_tasks" - name: ANSIBLE_FORCE_COLOR value: "True" - nodes: - standalone: - hostName: standalone <2> - ansible: - ansibleHost: ${computes[standalone.localdomain]} - networks: - - defaultRoute: true - fixedIP: ${computes[standalone.localdomain]} - name: ctlplane - subnetName: subnet1 - - name: internalapi - subnetName: subnet1 - - name: storage - subnetName: subnet1 - - name: tenant - subnetName: subnet1 + - name: ANSIBLE_VERBOSITY + value: 3 nodeTemplate: ansibleSSHPrivateKeySecret: dataplane-adoption-secret ansible: @@ -355,7 +475,7 @@ endif::[] # # These vars are for the network config templates themselves and are # considered EDPM network defaults. - neutron_physical_bridge_name: br-ctlplane + neutron_physical_bridge_name: br-ctlplane <5> neutron_public_interface_name: eth0 # edpm_nodes_validation @@ -363,7 +483,7 @@ endif::[] edpm_nodes_validation_validate_gateway_icmp: false # edpm ovn-controller configuration - edpm_ovn_bridge_mappings: <3> + edpm_ovn_bridge_mappings: <6> edpm_ovn_bridge: br-int edpm_ovn_encap_type: geneve ovn_monitor_all: true @@ -414,72 +534,102 @@ endif::[] # Do not attempt OVS major upgrades here edpm_ovs_packages: - openvswitch3.3 + nodes: EOF + cat computes-$CELL >> nodeset-${CELL}.yaml +done ---- + -<1> If TLS Everywhere is enabled, change `spec:tlsEnabled` to `true`. -<2> If your deployment has a custom DNS Domain, modify the `spec:nodes:[NODE NAME]:hostName` to use fqdn for the node. -<3> Replace `` with the value of the bridge mappings in your configuration, for example, `"datacentre:br-ctlplane"`. +<1> The networks composition must match the source cloud configuration to avoid dataplane connectivity downtime. The ctlplane network must come first. +<2> Use node sets names, like `openstack-cell1`, `openstack-cell2`. Only create node sets for cells containing compute nodes. +<3> If TLS Everywhere is enabled, change `spec.tlsEnabled` to `true`. +<4> If not adopting the telemetry services, omit it from the services list. +<5> The bridge name and other OVN and Neutron specific values must match the source cloud configuration to avoid dataplane connectivity downtime. +<6> Replace `` with the value of the bridge mappings in your configuration, for example, `"datacentre:br-ctlplane"`. + +[NOTE] +The global service `ssh-known-hosts` may only be defined for a single node set. -. Ensure that you use the same `ovn-controller` settings in the `OpenStackDataPlaneNodeSet` CR that you used in the {compute_service} nodes before adoption. This configuration is stored in the `external_ids` column in the `Open_vSwitch` table in the Open vSwitch database: +* Ensure that you use the same `ovn-controller` settings in the `OpenStackDataPlaneNodeSet` CR that you used in the {compute_service} nodes before adoption. This configuration is stored in the `external_ids` column in the `Open_vSwitch` table in the Open vSwitch database: + ---- -ovs-vsctl list Open . +$ ovs-vsctl list Open . ... external_ids : {hostname=standalone.localdomain, ovn-bridge=br-int, ovn-bridge-mappings=, ovn-chassis-mac-mappings="datacentre:1e:0a:bb:e6:7c:ad", ovn-encap-ip="172.19.0.100", ovn-encap-tos="0", ovn-encap-type=geneve, ovn-match-northd-version=False, ovn-monitor-all=True, ovn-ofctrl-wait-before-clear="8000", ovn-openflow-probe-interval="60", ovn-remote="tcp:ovsdbserver-sb.openstack.svc:6642", ovn-remote-probe-interval="60000", rundir="/var/run/openvswitch", system-id="2eec68e6-aa21-4c95-a868-31aeafc11736"} ... ---- + -* Replace `` with the value of the bridge mappings in your configuration, for example, `"datacentre:br-ctlplane"`. +Replace `` with the value of the bridge mappings in your configuration, for example, `"datacentre:br-ctlplane"` + +. Deploy the `OpenStackDataPlaneNodeSet` CRs for each Nova compute cell ++ +---- +$ for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + oc apply -f nodeset-${CELL}.yaml +done +---- . If you use a Ceph back end for {block_storage_first_ref}, prepare the adopted data plane workloads: + [source,yaml] ---- -$ oc patch osdpns/openstack-cell1 --type=merge --patch " -spec: - services: +$ for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + if [ "$CELL" = "cell1" ]; then + GLOBAL="- ssh-known-hosts" + else + GLOBAL=" " + fi + + oc patch osdpns/openstack-$CELL --type=merge --patch " + spec: + services: ifeval::["{build}" == "downstream"] - - redhat + - redhat endif::[] - - bootstrap - - download-cache - - configure-network - - validate-network - - install-os - - configure-os - - ssh-known-hosts - - run-os - - reboot-os - - ceph-client - - install-certs - - ovn - - neutron-metadata - - libvirt - - nova - - telemetry - nodeTemplate: - extraMounts: - - extraVolType: Ceph - volumes: - - name: ceph - secret: - secretName: ceph-conf-files - mounts: - - name: ceph - mountPath: "/etc/ceph" - readOnly: true -" + - bootstrap + - download-cache + - configure-network + - validate-network + - install-os + - ceph-hci-pre + - configure-os + $GLOBAL + - run-os + - reboot-os + - ceph-client + - install-certs + - ovn + - neutron-metadata + - libvirt + - nova-$CELL + - telemetry + nodeTemplate: + extraMounts: + - extraVolType: Ceph + volumes: + - name: ceph + secret: + secretName: ceph-conf-files + mounts: + - name: ceph + mountPath: "/etc/ceph" + readOnly: true + " +done ---- + [NOTE] -Ensure that you use the same list of services from the original `OpenStackDataPlaneNodeSet` CR, except for the inserted `ceph-client` service. +Ensure that you use the same list of services from the original `OpenStackDataPlaneNodeSet` CR, except for the inserted `ceph-client` and `ceph-hci-pre` services. . Optional: Enable `neutron-sriov-nic-agent` in the `OpenStackDataPlaneNodeSet` CR: + [source,yaml] ---- -$ oc patch openstackdataplanenodeset openstack-cell1 --type='json' --patch='[ +$ for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + oc patch openstackdataplanenodeset openstack-$CELL --type='json' --patch='[ { "op": "add", "path": "/spec/services/-", @@ -496,20 +646,23 @@ $ oc patch openstackdataplanenodeset openstack-cell1 --type='json' --patch='[ "op": "add", "path": "/spec/nodeTemplate/ansible/ansibleVars/edpm_neutron_sriov_agent_SRIOV_NIC_resource_provider_hypervisors", "value": "dummy-dev:standalone.localdomain" - } -]' + }]' + done ---- . Optional: Enable `neutron-dhcp` in the `OpenStackDataPlaneNodeSet` CR: + [source,yaml] ---- -$ oc patch openstackdataplanenodeset openstack-cell1 --type='json' --patch='[ +$ for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + oc patch openstackdataplanenodeset openstack-$CELL --type='json' --patch='[ { "op": "add", "path": "/spec/services/-", "value": "neutron-dhcp" }]' +done ---- + [NOTE] @@ -555,8 +708,7 @@ kind: OpenStackDataPlaneDeployment metadata: name: openstack-pre-adoption spec: - nodeSets: - - openstack + nodeSets: $NODESETS servicesOverride: - pre-adoption-validation EOF @@ -618,8 +770,7 @@ kind: OpenStackDataPlaneDeployment metadata: name: tripleo-cleanup spec: - nodeSets: - - openstack + nodeSets: $NODESETS servicesOverride: - tripleo-cleanup EOF @@ -635,8 +786,7 @@ kind: OpenStackDataPlaneDeployment metadata: name: openstack spec: - nodeSets: - - openstack + nodeSets: $NODESETS EOF ---- + @@ -656,10 +806,12 @@ $ watch oc get pod -l app=openstackansibleee $ oc logs -l app=openstackansibleee -f --max-log-requests 20 ---- -. Wait for the data plane node set to reach the `Ready` status: +. Wait for the data plane node sets to reach the `Ready` status: + ---- -$ oc wait --for condition=Ready osdpns/openstack-cell1 --timeout=30m +$ for CELL in $(echo $RENAMED_CELLS); do + oc wait --for condition=Ready osdpns/openstack-$CELL --timeout=30m +done ---- . Verify that the {networking_first_ref} agents are running: @@ -675,6 +827,12 @@ $ oc exec openstackclient -- openstack network agent list +--------------------------------------+------------------------------+------------------------+-------------------+-------+-------+----------------------------+ ---- +[NOTE] +==== +After the data plane adoption completed, {OpenStackPreviousInstaller} cell controllers should be decomissioned. +To become new cell compute nodes, they must be re-provisioned, then scaled-out, or added into additional node sets of corresponding cells. +==== + .Next steps * You must perform a fast-forward upgrade on your Compute services. For more information, see xref:performing-a-fast-forward-upgrade-on-compute-services_{context}[Performing a fast-forward upgrade on Compute services]. diff --git a/docs_user/modules/proc_adopting-networker-services-to-the-data-plane.adoc b/docs_user/modules/proc_adopting-networker-services-to-the-data-plane.adoc index c6899258b..4928c6adf 100644 --- a/docs_user/modules/proc_adopting-networker-services-to-the-data-plane.adoc +++ b/docs_user/modules/proc_adopting-networker-services-to-the-data-plane.adoc @@ -55,7 +55,6 @@ endif::[] - validate-network - install-os - configure-os - - ssh-known-hosts - run-os - install-certs - ovn diff --git a/docs_user/modules/proc_adopting-the-compute-service.adoc b/docs_user/modules/proc_adopting-the-compute-service.adoc index b3dd649a4..763981c90 100644 --- a/docs_user/modules/proc_adopting-the-compute-service.adoc +++ b/docs_user/modules/proc_adopting-the-compute-service.adoc @@ -13,8 +13,22 @@ To adopt the {compute_service_first_ref}, you patch an existing `OpenStackContro * You have completed the previous adoption steps. * You have defined the following shell variables. Replace the following example values with the values that are correct for your environment: ---- -$ alias openstack="oc exec -t openstackclient -- openstack" +alias openstack="oc exec -t openstackclient -- openstack" + +DEFAULT_CELL_NAME="cell3" +RENAMED_CELLS="cell1 cell2 $DEFAULT_CELL_NAME" +---- ++ +The `default` cell takes a new name from `DEFAULT_CELL_NAME`. +In a multi-cell adoption scenario, it may retain its original 'default' name as well. + +A standalone TripleO only creates a default cell, so you should define that instead: ++ +---- +DEFAULT_CELL_NAME="cell1" +RENAMED_CELLS="cell1" ---- ++ .Procedure @@ -22,10 +36,40 @@ $ alias openstack="oc exec -t openstackclient -- openstack" + [NOTE] This procedure assumes that {compute_service} metadata is deployed on the top level and not on each cell level. If the {OpenStackShort} deployment has a per-cell metadata deployment, adjust the following patch as needed. You cannot run the metadata service in `cell0`. +To enable local cells metadata services, set `spec.nova.template.cellTemplates.cell*.metadataServiceTemplate.enable` in `OpenStackControlPlane` CR. + [source,yaml] ---- -$ oc patch openstackcontrolplane openstack -n openstack --type=merge --patch ' +rm -f celltemplates +for CELL in $(echo $RENAMED_CELLS); do + cat >> celltemplates << EOF + ${CELL}: + hasAPIAccess: true + cellDatabaseAccount: nova-$CELL + cellDatabaseInstance: openstack-$CELL + cellMessageBusInstance: rabbitmq-$CELL + metadataServiceTemplate: + enabled: false # enable here to run it in a cell instead + override: + service: + metadata: + annotations: + metallb.universe.tf/address-pool: internalapi + metallb.universe.tf/allow-shared-ip: internalapi + metallb.universe.tf/loadBalancerIPs: 172.17.0.$(( 79 + ${CELL##*cell} )) + spec: + type: LoadBalancer + customServiceConfig: | + [workarounds] + disable_compute_service_check_for_ffu=true + conductorServiceTemplate: + customServiceConfig: | + [workarounds] + disable_compute_service_check_for_ffu=true +EOF +done + +cat > oscp-patch.yaml << EOF spec: nova: enabled: true @@ -33,6 +77,7 @@ spec: route: {} template: secret: osp-secret + apiDatabaseAccount: nova-api apiServiceTemplate: override: service: @@ -67,37 +112,25 @@ spec: disable_compute_service_check_for_ffu=true cellTemplates: cell0: + hasAPIAccess: true + cellDatabaseAccount: nova-cell0 + cellDatabaseInstance: openstack + cellMessageBusInstance: rabbitmq conductorServiceTemplate: customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=true - cell1: - metadataServiceTemplate: - enabled: false # enable here to run it in a cell instead - override: - service: - metadata: - annotations: - metallb.universe.tf/address-pool: internalapi - metallb.universe.tf/allow-shared-ip: internalapi - metallb.universe.tf/loadBalancerIPs: 172.17.0.80 - spec: - type: LoadBalancer - customServiceConfig: | - [workarounds] - disable_compute_service_check_for_ffu=true - conductorServiceTemplate: - customServiceConfig: | - [workarounds] - disable_compute_service_check_for_ffu=true -' +EOF +cat celltemplates >> oscp-patch.yaml +oc patch openstackcontrolplane openstack -n openstack --type=merge --patch-file=oscp-patch.yaml ---- ++ -. If you are adopting the {compute_service} with the {bare_metal_first_ref}, append the following `novaComputeTemplates` in the `cell1` section of the {compute_service} CR patch: +. If you are adopting the {compute_service} with the {bare_metal_first_ref}, append the following `novaComputeTemplates` in the each `cellX` section of the {compute_service} CR patch: + [source,yaml] ---- - cell1: + cell: novaComputeTemplates: standalone: customServiceConfig: | @@ -105,9 +138,11 @@ spec: host = [workarounds] disable_compute_service_check_for_ffu=true + computeDriver: ironic.IronicDriver + ... ---- + -* Replace with the hostname of the node that is running the `ironic` Compute driver in the source cloud. +* Replace `` with the hostname of the node that is running the `ironic` Compute driver in the source cloud. . Wait for the CRs for the Compute control plane services to be ready: + @@ -117,7 +152,7 @@ $ oc wait --for condition=Ready --timeout=300s Nova/nova + [NOTE] The local Conductor services are started for each cell, while the superconductor runs in `cell0`. -Note that `disable_compute_service_check_for_ffu` is mandatory for all imported Compute services until the external data plane is imported, and until Compute services are fast-forward upgraded. For more information, see xref:adopting-compute-services-to-the-data-plane_data-plane[Adopting Compute services to the {rhos_acro} data plane] and xref:performing-a-fast-forward-upgrade-on-compute-services_data-plane[Performing a fast-forward upgrade on Compute services]. +Note that `disable_compute_service_check_for_ffu` is mandatory for all imported Compute services until the external data plane is imported, and until Compute services are fast-forward upgraded. For more information, see xref:adopting-compute-services-to-the-data-plane_data-plane[Adopting Compute services to the {rhos_acro} data plane] and xref:performing-a-fast-forward-upgrade-on-compute-services_data-plane[Upgrading Compute services]. .Verification @@ -131,24 +166,33 @@ $ openstack server list + ** Compare the outputs with the topology-specific configuration in xref:proc_retrieving-topology-specific-service-configuration_migrating-databases[Retrieving topology-specific service configuration]. -* Query the superconductor to check that `cell1` exists, and compare it to pre-adoption values: +* Query the superconductor to check that the expected cells exist, and compare it to pre-adoption values: + ---- -set +u -. ~/.source_cloud_exported_variables_default -set -u -echo $PULL_OPENSTACK_CONFIGURATION_NOVAMANAGE_CELL_MAPPINGS -oc rsh nova-cell0-conductor-0 nova-manage cell_v2 list_cells | grep -F '| cell1 |' +$ for CELL in $(echo $CELLS); do + set +u + . ~/.source_cloud_exported_variables_$CELL + set -u + RCELL=$CELL + [ "$CELL" = "default" ] && RCELL=$DEFAULT_CELL_NAME + + echo "comparing $CELL to $RCELL" + echo $PULL_OPENSTACK_CONFIGURATION_NOVAMANAGE_CELL_MAPPINGS | grep -F "| $CELL |" + oc rsh nova-cell0-conductor-0 nova-manage cell_v2 list_cells | grep -F "| $RCELL |" +done ---- + -The following changes are expected: +The following changes are expected, for each cell `X`: + -** The `cell1` `nova` database and username become `nova_cell1`. -** The default cell is renamed to `cell1`. +** The `cellX` `nova` database and username become `nova_cellX`. +** The `default` cell is renamed to `DEFAULT_CELL_NAME` (it may retain the original name, if there are multiple cells). +** RabbitMQ transport URL no longer uses `guest`. ** RabbitMQ transport URL no longer uses `guest`. [NOTE] +==== At this point, the {compute_service} control plane services do not control the existing {compute_service} workloads. The control plane manages the data plane only after the data adoption process is completed. For more information, see xref:adopting-compute-services-to-the-data-plane_data-plane[Adopting Compute services to the {rhos_acro} data plane]. +==== [IMPORTANT] To import external Compute services to the {rhos_acro} data plane, you must upgrade them first. diff --git a/docs_user/modules/proc_configuring-data-plane-nodes.adoc b/docs_user/modules/proc_configuring-data-plane-nodes.adoc index 9ff5e2884..c213a34ce 100644 --- a/docs_user/modules/proc_configuring-data-plane-nodes.adoc +++ b/docs_user/modules/proc_configuring-data-plane-nodes.adoc @@ -18,7 +18,7 @@ kind: NetConfig metadata: name: netconfig spec: - networks: + networks: <1> - name: internalapi dnsDomain: internalapi.example.com subnets: @@ -47,6 +47,7 @@ spec: cidr: 172.19.0.0/24 vlan: 22 ---- +<1> The networks composition must match the source cloud configuration to avoid dataplane connectivity downtime. . Optional: In the `NetConfig` CR, list multiple ranges for the `allocationRanges` field to exclude some of the IP addresses, for example, to accommodate IP addresses that are already consumed by the adopted environment: + diff --git a/docs_user/modules/proc_deploying-backend-services.adoc b/docs_user/modules/proc_deploying-backend-services.adoc index 19dad6193..8045fb6ca 100644 --- a/docs_user/modules/proc_deploying-backend-services.adoc +++ b/docs_user/modules/proc_deploying-backend-services.adoc @@ -273,6 +273,14 @@ endif::[] secret: osp-secret replicas: 3 storageRequest: 5G + openstack-cell2: + secret: osp-secret + replicas: 1 + storageRequest: 5G + openstack-cell3: + secret: osp-secret + replicas: 1 + storageRequest: 5G memcached: enabled: true templates: @@ -352,6 +360,28 @@ ifeval::["{OpenStackPreviousInstaller}" == "director_operator"] endif::[] spec: type: LoadBalancer + rabbitmq-cell2: + persistence: + storage: 1G + override: + service: + metadata: + annotations: + metallb.universe.tf/address-pool: internalapi + metallb.universe.tf/loadBalancerIPs: 172.17.0.87 + spec: + type: LoadBalancer + rabbitmq-cell3: + persistence: + storage: 1G + override: + service: + metadata: + annotations: + metallb.universe.tf/address-pool: internalapi + metallb.universe.tf/loadBalancerIPs: 172.17.0.88 + spec: + type: LoadBalancer telemetry: enabled: false @@ -375,15 +405,16 @@ endif::[] + <1> Select an existing storage class in your {OpenShiftShort} cluster. -This example provides the required infrastructure database and messaging services for 1 Compute cell -named `cell1`. Adjust the names, counts, IP addresses, and numbers, such as `replicas`, `storage`, or `storageRequest`, as needed. +This example provides the required infrastructure database and messaging services for 3 Compute cells +named `cell1`, `cell2`, and `cell3`. Adjust the names, counts, IP addresses, and numbers, +such as `replicas`, `storage`, or `storageRequest`, as needed. .Verification * Verify that MariaDB and RabbitMQ are running for all defined cells: + ---- -$ RENAMED_CELLS="cell1" +$ RENAMED_CELLS="cell1 cell2 cell3" $ oc get pod openstack-galera-0 -o jsonpath='{.status.phase}{"\n"}' | grep Running $ oc get pod rabbitmq-server-0 -o jsonpath='{.status.phase}{"\n"}' | grep Running $ for CELL in $(echo $RENAMED_CELLS); do diff --git a/docs_user/modules/proc_migrating-databases-to-mariadb-instances.adoc b/docs_user/modules/proc_migrating-databases-to-mariadb-instances.adoc index e8b5a98b4..b81581c5d 100644 --- a/docs_user/modules/proc_migrating-databases-to-mariadb-instances.adoc +++ b/docs_user/modules/proc_migrating-databases-to-mariadb-instances.adoc @@ -26,9 +26,9 @@ $ STORAGE_CLASS=local-storage $ MARIADB_IMAGE=registry.redhat.io/rhosp-dev-preview/openstack-mariadb-rhel9:18.0 endif::[] -$ CELLS="default" <1> -$ DEFAULT_CELL_NAME="cell1" -$ RENAMED_CELLS="$DEFAULT_CELL_NAME" +$ CELLS="default cell1 cell2" <1> +$ DEFAULT_CELL_NAME="cell3" +$ RENAMED_CELLS="cell1 cell2 $DEFAULT_CELL_NAME" $ NAMESPACE="openstack" diff --git a/docs_user/modules/proc_performing-a-fast-forward-upgrade-on-compute-services.adoc b/docs_user/modules/proc_performing-a-fast-forward-upgrade-on-compute-services.adoc index c7aa5b953..9836ae454 100644 --- a/docs_user/modules/proc_performing-a-fast-forward-upgrade-on-compute-services.adoc +++ b/docs_user/modules/proc_performing-a-fast-forward-upgrade-on-compute-services.adoc @@ -8,13 +8,48 @@ You must upgrade the Compute services from {rhos_prev_long} {rhos_prev_ver} to { * Remove pre-fast-forward upgrade workarounds from the Compute control plane services and Compute data plane services. * Run Compute database online migrations to update live data. +. Prerequisites + +* Define the shell variables necessary to apply the fast-forward upgrade commands, for each Nova compute cell. ++ +---- +DEFAULT_CELL_NAME="cell3" +RENAMED_CELLS="cell1 cell2 $DEFAULT_CELL_NAME" + +declare -A PODIFIED_DB_ROOT_PASSWORD +for CELL in $(echo "super $RENAMED_CELLS"); do + PODIFIED_DB_ROOT_PASSWORD[$CELL]=$(oc get -o json secret/osp-secret | jq -r .data.DbRootPassword | base64 -d) +done + +NODESETS="" +for CELL in $(echo $RENAMED_CELLS); do + oc get Openstackdataplanenodeset openstack-${CELL} || continue + NODESETS="'openstack-${CELL}', $NODESETS" <1> +done +NODESETS="[${NODESETS%,*}]" + +NOVASERVICES="" +for CELL in $(echo $RENAMED_CELLS); do + NOVASERVICES="'nova-${CELL}', $NOVASERVICES" <2> +done +NOVASERVICES="[${NOVASERVICES%,*}]" +---- ++ +<1> Each dataplane node set name must match the name of the node set that you defined in its corresponding `OpenStackDataPlaneNodeSet` CR. +<2> Each dataplane services name must match the name of the service that you included in the `servicesOverride` key of its corresponding `OpenStackDataPlaneNodeSet` CR. + +[NOTE] +Here, the cells databases share the password defined in `osp-secret`. + .Procedure -. Wait for cell1 Compute data plane services version to update: +. Wait for {compute_service} data plane services version updated for all cells: + ---- -$ oc exec openstack-cell1-galera-0 -c galera -- mysql -rs -uroot -p$PODIFIED_DB_ROOT_PASSWORD \ - -e "select a.version from nova_cell1.services a join nova_cell1.services b where a.version!=b.version and a.binary='nova-compute';" +$ for CELL in $(echo $RENAMED_CELLS); do + oc exec openstack-$CELL-galera-0 -c galera -- mysql -rs -uroot -p"${PODIFIED_DB_ROOT_PASSWORD[$CELL]}" \ + -e "select a.version from nova_${CELL}.services a join nova_${CELL}.services b where a.version!=b.version and a.binary='nova-compute';" +done ---- + [NOTE] @@ -28,17 +63,10 @@ Review any errors in the nova Compute agent logs on the data plane, and the `nov + [source,yaml] ---- -$ oc patch openstackcontrolplane openstack -n openstack --type=merge --patch ' -spec: - nova: - template: - cellTemplates: - cell0: - conductorServiceTemplate: - customServiceConfig: | - [workarounds] - disable_compute_service_check_for_ffu=false - cell1: +$ rm -f celltemplates +$ for CELL in $(echo $RENAMED_CELLS); do + cat >> celltemplates << EOF + ${CELL}: metadataServiceTemplate: customServiceConfig: | [workarounds] @@ -47,6 +75,13 @@ spec: customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=false +EOF +done + +$ cat > oscp-patch.yaml << EOF +spec: + nova: + template: apiServiceTemplate: customServiceConfig: | [workarounds] @@ -59,7 +94,39 @@ spec: customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=false -' + cellTemplates: + cell0: + conductorServiceTemplate: + customServiceConfig: | + [workarounds] + disable_compute_service_check_for_ffu=false +EOF +$ cat celltemplates >> oscp-patch.yaml +---- ++ + +* If you are adopting the {compute_service} with the {bare_metal_first_ref}, append the following `novaComputeTemplates` in the needed `cell` section(s) of the {compute_service} CR patch: ++ +[source,yaml] +---- + cell: + novaComputeTemplates: + : <1> + customServiceConfig: | + [DEFAULT] + host = + [workarounds] + disable_compute_service_check_for_ffu=true + computeDriver: ironic.IronicDriver + ... +---- ++ +<1> Replace `` with the hostname of the node that is running the `ironic` Compute driver in the source cloud cell. + +. Apply the patch file ++ +---- +$ oc patch openstackcontrolplane openstack -n openstack --type=merge --patch-file=oscp-patch.yaml ---- . Wait until the Compute control plane services CRs are ready: @@ -74,49 +141,43 @@ $ oc wait --for condition=Ready --timeout=300s Nova/nova + [source,yaml] ---- -$ oc apply -f - < TRIPLEO_PASSWORDS[$CELL]="$PASSWORD_FILE" > done diff --git a/tests/roles/backend_services/templates/openstack_control_plane.j2 b/tests/roles/backend_services/templates/openstack_control_plane.j2 index ef5fb7723..498583433 100644 --- a/tests/roles/backend_services/templates/openstack_control_plane.j2 +++ b/tests/roles/backend_services/templates/openstack_control_plane.j2 @@ -82,6 +82,15 @@ spec: secret: osp-secret replicas: 1 storageRequest: 1Gi + # TODO(bogdando): iterate based on renamed_cells value in kustomization.yaml + openstack-cell2: + secret: osp-secret + replicas: 1 + storageRequest: 1Gi + openstack-cell3: + secret: osp-secret + replicas: 1 + storageRequest: 1Gi memcached: enabled: true @@ -142,6 +151,29 @@ spec: metallb.universe.tf/loadBalancerIPs: 172.17.0.86 spec: type: LoadBalancer + # TODO(bogdando): iterate based on renamed_cells value in kustomization.yaml + rabbitmq-cell2: + persistence: + storage: 1Gi + override: + service: + metadata: + annotations: + metallb.universe.tf/address-pool: internalapi + metallb.universe.tf/loadBalancerIPs: 172.17.0.87 + spec: + type: LoadBalancer + rabbitmq-cell3: + persistence: + storage: 1Gi + override: + service: + metadata: + annotations: + metallb.universe.tf/address-pool: internalapi + metallb.universe.tf/loadBalancerIPs: 172.17.0.88 + spec: + type: LoadBalancer telemetry: enabled: false diff --git a/tests/roles/common_defaults/defaults/main.yaml b/tests/roles/common_defaults/defaults/main.yaml index c2ce53153..1b71e649f 100644 --- a/tests/roles/common_defaults/defaults/main.yaml +++ b/tests/roles/common_defaults/defaults/main.yaml @@ -32,6 +32,42 @@ cells_env: | DEFAULT_CELL_NAME={{ default_cell_name }} RENAMED_CELLS="{{ renamed_cells | join(' ') }}" +# Header for osdp nodesets names evaluation +nodesets_env: | + {{ edpm_computes_shell_vars_src }} + + NODESETS="" + for CELL in $(echo $RENAMED_CELLS); do + ref="COMPUTES_$(echo ${CELL}|tr '[:lower:]' '[:upper:]')" + eval names=\${!${ref}[@]} + [ -z "$names" ] && continue + NODESETS="'openstack-${CELL}', $NODESETS" + done + NODESETS="[${NODESETS%,*}{% if edpm_nodes_networker is defined %}, 'openstack-networker'{% endif %}]" + +nodesets_env_oc: | + {{ shell_header }} + {{ oc_header }} + {{ cells_env }} + + NODESETS="" + for CELL in $(echo $RENAMED_CELLS); do + oc get Openstackdataplanenodeset openstack-${CELL} || continue + NODESETS="'openstack-${CELL}', $NODESETS" + done + NODESETS="[${NODESETS%,*}]" + +# Header for custom nova osdp services names evaluation +nova_services_env: | + {{ shell_header }} + {{ cells_env }} + + NOVASERVICES="" + for CELL in $(echo $RENAMED_CELLS); do + NOVASERVICES="'nova-${CELL}', $NOVASERVICES" + done + NOVASERVICES="[${NOVASERVICES%,*}]" + # Headers for DB client CLI image mariadb_image_env: | STORAGE_CLASS={{ storage_class_name }} @@ -122,6 +158,20 @@ mariadb_copy_shell_vars_dst: | fi done +# Header for the destination cloud EDPM Nova cell computes FDQN and IP pairs, per a cell +edpm_computes_shell_vars_src: |- + {{ shell_header }} + {{ cells_env }} + + {% for cell in renamed_cells %} + declare -A COMPUTES_{{ cell.upper() }} + COMPUTES_{{ cell.upper() }}=( + {%- for v in edpm_nodes[cell] | default({}) %} + ["{{ edpm_nodes[cell][v].hostName }}"]={{ edpm_nodes[cell][v].ansible.ansibleHost }} + {% endfor -%} + ) + {% endfor %} + pull_openstack_configuration_ssh_shell_vars: | CONTROLLER1_SSH="{{ controller1_ssh }}" CONTROLLER2_SSH="{{ controller2_ssh }}" diff --git a/tests/roles/control_plane_rollback/defaults/main.yaml b/tests/roles/control_plane_rollback/defaults/main.yaml index 3fc557ca2..ebbe0c96a 100644 --- a/tests/roles/control_plane_rollback/defaults/main.yaml +++ b/tests/roles/control_plane_rollback/defaults/main.yaml @@ -1,2 +1,3 @@ +os_cloud_name: standalone control_plane_rollback_verify_command: | ssh root@{{ standalone_ip }} OS_CLOUD={{ os_cloud_name }} openstack user list diff --git a/tests/roles/dataplane_adoption/defaults/main.yaml b/tests/roles/dataplane_adoption/defaults/main.yaml index 4312ddbd4..5211c4b8f 100644 --- a/tests/roles/dataplane_adoption/defaults/main.yaml +++ b/tests/roles/dataplane_adoption/defaults/main.yaml @@ -66,7 +66,9 @@ image_tag: "current-podified" ansible_ssh_private_key_secret: dataplane-adoption-secret default_timesync_ntp_servers: - hostname: pool.ntp.org +# FIXME(bogdando): adapt for mult-cell or single-cell edpm_node_hostname: standalone.localdomain +edpm_node_ip: 192.168.122.100 edpm_user: root edpm_nodes: cell1: @@ -158,11 +160,12 @@ os_diff_data_dir: tmp/os-diff prelaunch_test_instance: true telemetry_adoption: true +# nodes data will be templated in as a separate dataplane_cr: | apiVersion: dataplane.openstack.org/v1beta1 kind: OpenStackDataPlaneNodeSet metadata: - name: openstack-cell1 + name: openstack-$CELL spec: tlsEnabled: {{ enable_tlse }} networkAttachments: @@ -175,7 +178,7 @@ dataplane_cr: | - validate-network - install-os - configure-os - - ssh-known-hosts + $GLOBAL - run-os - reboot-os - install-certs @@ -183,7 +186,7 @@ dataplane_cr: | - neutron-metadata {%+ if compute_adoption|bool +%} - libvirt - - nova + - nova-$CELL {%+ endif +%} {% if telemetry_adoption|bool +%} - telemetry @@ -198,8 +201,7 @@ dataplane_cr: | - name: ANSIBLE_SSH_ARGS value: "-C -o ControlMaster=auto -o ControlPersist=80s" - name: ANSIBLE_VERBOSITY - value: "{{ dataplane_verbosity | default ('1') }}" - nodes: {{ edpm_nodes["cell1"] }} + value: "{{ dataplane_verbosity | default ('3') }}" nodeTemplate: ansibleSSHPrivateKeySecret: {{ ansible_ssh_private_key_secret }} ansible: @@ -276,6 +278,7 @@ dataplane_cr: | ovn_monitor_all: true edpm_ovn_remote_probe_interval: 60000 edpm_ovn_ofctrl_wait_before_clear: 8000 + nodes: dpa_dir: "../.." dpa_tests_dir: "{{ dpa_dir }}/tests" diff --git a/tests/roles/dataplane_adoption/tasks/main.yaml b/tests/roles/dataplane_adoption/tasks/main.yaml index 3a4d06b63..6d0a2f1b5 100644 --- a/tests/roles/dataplane_adoption/tasks/main.yaml +++ b/tests/roles/dataplane_adoption/tasks/main.yaml @@ -6,6 +6,7 @@ ceph_backend_configuration_fsid_shell_vars: | CEPH_FSID=$(oc get secret ceph-conf-files -o json | jq -r '.data."ceph.conf"' | base64 -d | grep fsid | sed -e 's/fsid = //') +# FIXME: missing docs coverage? - name: Patch openstackversion to use image built from source or latest if none is defined when: not skip_patching_ansibleee_csv | bool no_log: "{{ use_no_log }}" @@ -91,7 +92,7 @@ rm -f id* cd - -- name: create a Nova Compute Extra Config service (no ceph backend in use) +- name: create a configuration map which should become common for all cells (local storage back end) when: - compute_adoption|bool - ('ceph' not in [nova_libvirt_backend]) @@ -103,15 +104,15 @@ apiVersion: v1 kind: ConfigMap metadata: - name: nova-extra-config + name: nova-cells-global-config namespace: {{ rhoso_namespace }} data: - 19-nova-compute-cell1-workarounds.conf: | + 99-nova-compute-cells-workarounds.conf: | [workarounds] disable_compute_service_check_for_ffu=true EOF -- name: create a Nova Compute Extra Config service (ceph backend in use) +- name: create a configuration map which should become common for all cells (Ceph storage back end) when: - compute_adoption|bool - ('ceph' in [nova_libvirt_backend]) @@ -124,10 +125,10 @@ apiVersion: v1 kind: ConfigMap metadata: - name: nova-extra-config + name: nova-cells-global-config namespace: {{ rhoso_namespace }} data: - 19-nova-compute-cell1-workarounds.conf: | + 99-nova-compute-cells-workarounds.conf: | [workarounds] disable_compute_service_check_for_ffu=true 03-ceph-nova.conf: | @@ -142,13 +143,107 @@ rbd_secret_uuid=$CEPH_FSID EOF -- name: Create OpenStackDataPlaneNodeSet +- name: create dataplane services for Nova cells to enable pre-upgrade workarounds + when: + - compute_adoption|bool + no_log: "{{ use_no_log }}" + ansible.builtin.shell: | + {{ shell_header }} + {{ oc_header }} + {{ cells_env }} + + for CELL in $(echo $RENAMED_CELLS); do + oc apply -f - < edpm-crd.yaml + {{ nodesets_env }} + + declare -A names + for CELL in $(echo $RENAMED_CELLS); do + ref="COMPUTES_$(echo ${CELL}|tr '[:lower:]' '[:upper:]')" + eval names=\${!${ref}[@]} + [ -z "$names" ] && continue + ind=0 + rm -f computes-$CELL + for compute in $names; do + ip="${ref}['$compute']" + cat >> computes-$CELL << EOF + ${compute}: + hostName: $compute + ansible: + ansibleHost: $compute + networks: + - defaultRoute: true + fixedIP: ${!ip} + name: ctlplane + subnetName: subnet1 + - name: internalapi + subnetName: subnet1 + - name: storage + subnetName: subnet1 + - name: tenant + subnetName: subnet1 + EOF + ind=$(( ind + 1 )) + done + + test -f computes-$CELL || continue + if [ "$CELL" = "cell1" ]; then + GLOBAL="- ssh-known-hosts" + else + GLOBAL=" " + fi + cat > nodeset-${CELL}.yaml <> nodeset-${CELL}.yaml + done + # NOTE(bogdando): omit computes-$CELL insertion as that is a manual operation only needed by docs. + # Those files are created here only to provide testing coverage of the commands provided in docs. + # Their contents is irrelevant as the real values come from edpm_nodes, by the below task. + +- name: update EDPM nodes data in nodes sets of cells + no_log: "{{ use_no_log }}" + when: + - compute_adoption|bool + ansible.builtin.shell: | + {{ shell_header }} + {% for cell in renamed_cells %} + {% if cell in edpm_nodes %} + cat > computes-real-{{ cell }} << EOF + {% filter indent(width=4) %} + {{ edpm_nodes[cell] | to_yaml(indent=2) }} + {% endfilter %} + EOF + cat computes-real-{{ cell }} >> nodeset-{{ cell }}.yaml + {% endif %} + {% endfor %} - name: Create OpenStackDataPlaneNodeSet_networker when: edpm_nodes_networker is defined or edpm_networker_deploy @@ -159,71 +254,99 @@ {{ networker_cr }} EOF +# FIXME: this is different in docs, need to align with tests +# FIXME(bogdando): get ovs_external_ids.json data for multiple node sets - name: check ovs external-ids with os-diff before deployment + failed_when: false tags: pull_openstack_configuration no_log: "{{ use_no_log }}" ansible.builtin.shell: | {{ shell_header }} - {{ os_diff_dir }}/os-diff diff {{ os_diff_data_dir }}/tripleo/ovs_external_ids/standalone/ovs_external_ids.json edpm-crd.yaml --crd --service ovs_external_ids -f ${PWD}/{{ os_diff_dir }}/config.yaml + {{ cells_env }} + for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + {{ os_diff_dir }}/os-diff diff {{ os_diff_data_dir }}/tripleo/ovs_external_ids/standalone/ovs_external_ids.json nodeset-${CELL}.yaml --crd --service ovs_external_ids -f ${PWD}/{{ os_diff_dir }}/config.yaml + done -- name: deploy dataplane +- name: deploy the OpenStackDataPlaneNodeSet CRs for each Nova compute cell ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} - cat edpm-crd.yaml | oc apply -f - + {{ cells_env }} + + for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + oc apply -f nodeset-${CELL}.yaml + done + {%+ if edpm_nodes_networker is defined or edpm_networker_deploy +%} cat edpm-crd-networker.yaml | oc apply -f - {%+ endif +%} -# TODO: Apply the ceph backend config for Cinder in the original openstack CR, via kustomize -- name: prepare adopted EDPM workloads to use Ceph backend for Cinder, if configured so +# TODO(bogdando): Apply the ceph backend config for Cinder in the original openstack CR, via kustomize perhaps? +- name: prepare the adopted data plane workloads to use Ceph backend for Cinder, if configured so no_log: "{{ use_no_log }}" when: - compute_adoption|bool - - cinder_volume_backend == "ceph" or cinder_backup_backend == "ceph" + - cinder_volume_backend == "ceph" or cinder_backup_backend == "ceph" or ('ceph' in [nova_libvirt_backend]) ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} - oc patch osdpns/openstack-cell1 --type=merge --patch " - spec: - services: - - bootstrap - - download-cache - - configure-network - - validate-network - - install-os - - ceph-hci-pre - - configure-os - - ssh-known-hosts - - run-os - - reboot-os - - ceph-client - - ovn - - neutron-metadata - - libvirt - - nova - {% if telemetry_adoption|bool +%} - - telemetry - {%+ endif +%} - nodeTemplate: - extraMounts: - - extraVolType: Ceph - volumes: - - name: ceph - secret: - secretName: ceph-conf-files - mounts: - - name: ceph - mountPath: "/etc/ceph" - readOnly: true - " - -- name: set neutron-sriov-nic-agent configuration in the OpenStackDataPlaneNodeSet CR + {{ cells_env }} + + for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + if [ "$CELL" = "cell1" ]; then + GLOBAL="- ssh-known-hosts" + else + GLOBAL=" " + fi + oc patch osdpns/openstack-$CELL --type=merge --patch " + spec: + services: + - bootstrap + - download-cache + - configure-network + - validate-network + - install-os + - ceph-hci-pre + - configure-os + $GLOBAL + - run-os + - reboot-os + - install-certs + - ceph-client + - ovn + - neutron-metadata + - libvirt + - nova-$CELL + {% if telemetry_adoption|bool +%} + - telemetry + {%+ endif +%} + nodeTemplate: + extraMounts: + - extraVolType: Ceph + volumes: + - name: ceph + secret: + secretName: ceph-conf-files + mounts: + - name: ceph + mountPath: "/etc/ceph" + readOnly: true + " + done + +- name: enable neutron-sriov-nic-agent in the OpenStackDataPlaneNodeSet CR no_log: "{{ use_no_log }}" ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} - oc patch openstackdataplanenodeset openstack-cell1 --type='json' --patch='[ + {{ cells_env }} + + for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + oc patch openstackdataplanenodeset openstack-$CELL --type='json' --patch='[ { "op": "add", "path": "/spec/services/-", @@ -241,27 +364,33 @@ "path": "/spec/nodeTemplate/ansible/ansibleVars/edpm_neutron_sriov_agent_SRIOV_NIC_resource_provider_hypervisors", "value": "" }]' + done when: - edpm_neutron_sriov_agent_enabled|bool - compute_adoption|bool -- name: set neutron-dhcp configuration in the OpenStackDataPlaneNodeSet CR +- name: enable neutron-dhcp in the OpenStackDataPlaneNodeSet CR no_log: "{{ use_no_log }}" ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} - oc patch openstackdataplanenodeset openstack-cell1 --type='json' --patch='[ + {{ cells_env }} + + for CELL in $(echo $RENAMED_CELLS); do + test -f nodeset-${CELL}.yaml || continue + oc patch openstackdataplanenodeset openstack-$CELL --type='json' --patch='[ { "op": "add", "path": "/spec/services/-", "value": "neutron-dhcp" }]' + done when: edpm_neutron_dhcp_agent_enabled|bool -- name: Run pre-adoption validation +- name: Run the pre-adoption validation when: run_pre_adoption_validation|bool block: - - name: Create OpenStackDataPlaneService/pre-adoption-validation + - name: create the validation service no_log: "{{ use_no_log }}" ansible.builtin.shell: | {{ shell_header }} @@ -275,27 +404,29 @@ playbook: osp.edpm.pre_adoption_validation EOF - - name: Create OpenStackDataPlaneDeployment to run the validation only + - name: create a OpenStackDataPlaneDeployment CR that runs only the validation no_log: "{{ use_no_log }}" ansible.builtin.shell: | {{ shell_header }} - {{ oc_header }} + {{ nodesets_env_oc }} + + {%+ if edpm_nodes_networker is defined or edpm_networker_deploy +%} + NODESETS="${NODESETS%]*},openstack-networker]" + {%+ endif +%} + oc apply -f - <> celltemplates << EOF + ${CELL}: metadataServiceTemplate: customServiceConfig: | [workarounds] @@ -33,6 +28,13 @@ customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=false + EOF + done + + cat > oscp-patch.yaml << EOF + spec: + nova: + template: apiServiceTemplate: customServiceConfig: | [workarounds] @@ -45,47 +47,58 @@ customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=false - ' + cellTemplates: + cell0: + conductorServiceTemplate: + customServiceConfig: | + [workarounds] + disable_compute_service_check_for_ffu=false + EOF + cat celltemplates >> oscp-patch.yaml + +- name: Apply the patch file + ansible.builtin.shell: | + {{ shell_header }} + {{ oc_header }} + oc patch openstackcontrolplane openstack -n openstack --type=merge --patch-file=oscp-patch.yaml -- name: Wait for Nova control plane services' CRs to become ready +- name: wait until the Compute control plane services CRs are ready ansible.builtin.include_role: name: nova_adoption tasks_from: wait.yaml -- name: remove pre-FFU workarounds for Nova compute EDPM services +- name: remove the pre-fast-forward upgrade workarounds from the Compute data plane services ansible.builtin.shell: | {{ shell_header }} - {{ oc_header }} - oc apply -f - <> celltemplates << EOF + ${CELL}: + hasAPIAccess: true + cellDatabaseAccount: nova-$CELL + cellDatabaseInstance: openstack-$CELL + cellMessageBusInstance: rabbitmq-$CELL + metadataServiceTemplate: + enabled: false # enable here to run it in a cell instead + override: + service: + metadata: + annotations: + metallb.universe.tf/address-pool: internalapi + metallb.universe.tf/allow-shared-ip: internalapi + metallb.universe.tf/loadBalancerIPs: 172.17.0.$(( 79 + ${CELL##*cell} )) + spec: + type: LoadBalancer + customServiceConfig: | + [workarounds] + disable_compute_service_check_for_ffu=true + conductorServiceTemplate: + customServiceConfig: | + [workarounds] + disable_compute_service_check_for_ffu=true + EOF + done + + cat > oscp-patch.yaml << EOF spec: nova: enabled: true @@ -10,6 +41,7 @@ nova_libvirt_patch: | route: {} template: secret: osp-secret + apiDatabaseAccount: nova-api apiServiceTemplate: override: service: @@ -44,30 +76,19 @@ nova_libvirt_patch: | disable_compute_service_check_for_ffu=true cellTemplates: cell0: + hasAPIAccess: true + cellDatabaseAccount: nova-cell0 + cellDatabaseInstance: openstack + cellMessageBusInstance: rabbitmq conductorServiceTemplate: customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=true - cell1: - metadataServiceTemplate: - enabled: false # enable here to run it in a cell instead - override: - service: - metadata: - annotations: - metallb.universe.tf/address-pool: internalapi - metallb.universe.tf/allow-shared-ip: internalapi - metallb.universe.tf/loadBalancerIPs: 172.17.0.80 - spec: - type: LoadBalancer - customServiceConfig: | - [workarounds] - disable_compute_service_check_for_ffu=true - conductorServiceTemplate: - customServiceConfig: | - [workarounds] - disable_compute_service_check_for_ffu=true + EOF + cat celltemplates >> oscp-patch.yaml +# NOTE(bogdando): no exact commands provided in docs for nova-ironic, +# so we can use ansible/jinja2 features to simplify testing these nova_ironic_patch: | spec: nova: @@ -76,6 +97,7 @@ nova_ironic_patch: | route: {} template: secret: osp-secret + apiDatabaseAccount: nova-api apiServiceTemplate: override: service: @@ -114,16 +136,23 @@ nova_ironic_patch: | customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=true - cell1: + {%+ for cell in renamed_cells +%} + {{ cell }}: + hasAPIAccess: true + cellDatabaseAccount: nova-cell{{ loop.index }} + cellDatabaseInstance: openstack-cell{{ loop.index }} + cellMessageBusInstance: rabbitmq-cell{{ loop.index }} conductorServiceTemplate: customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=true + {%+ if ironic_adoption|bool and cell in source_ironic_nodes +%} novaComputeTemplates: - standalone: + {%+ for n in source_ironic_nodes[cell] +%} + {{ n.template }}: customServiceConfig: | [DEFAULT] - host = standalone.localdomain + host = {{ n.name }} [workarounds] disable_compute_service_check_for_ffu=true replicas: 1 @@ -131,6 +160,9 @@ nova_ironic_patch: | computeDriver: ironic.IronicDriver networkAttachments: - internalapi + {%+ endfor +%} + {%+ endif +%} + {%+ endfor +%} remove_ffu_workaround_patch: | spec: @@ -154,15 +186,26 @@ remove_ffu_workaround_patch: | customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=false - cell1: + {%+ for cell in renamed_cells +%} + {{ cell }}: conductorServiceTemplate: customServiceConfig: | [workarounds] disable_compute_service_check_for_ffu=false + {%+ if ironic_adoption|bool and cell in source_ironic_nodes +%} novaComputeTemplates: - standalone: + {%+ for n in source_ironic_nodes[cell] +%} + {{ n.template }}: customServiceConfig: | [DEFAULT] - host = standalone.localdomain + host = {{ n.name }} [workarounds] disable_compute_service_check_for_ffu=false + replicas: 1 + resources: {} + computeDriver: ironic.IronicDriver + networkAttachments: + - internalapi + {%+ endfor +%} + {%+ endif +%} + {%+ endfor +%} diff --git a/tests/roles/nova_adoption/tasks/nova_ironic.yaml b/tests/roles/nova_adoption/tasks/nova_ironic.yaml index e5d826166..4ac04d3f8 100644 --- a/tests/roles/nova_adoption/tasks/nova_ironic.yaml +++ b/tests/roles/nova_adoption/tasks/nova_ironic.yaml @@ -2,20 +2,19 @@ ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} - oc patch openstackcontrolplane openstack -n openstack --type=merge --patch '{{ nova_ironic_patch}}' + oc patch openstackcontrolplane openstack -n openstack --type=merge --patch '{{ nova_ironic_patch }}' - -- name: wait for Nova control plane services' CRs to become ready +- name: wait until the Compute control plane services CRs are ready ansible.builtin.include_tasks: file: wait.yaml -- name: Remove FFU workarounds +- name: remove the pre-fast-forward upgrade workarounds from the Compute data plane services ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} oc patch openstackcontrolplane openstack -n openstack --type=merge --patch '{{ remove_ffu_workaround_patch }}' -- name: wait for Nova control plane services' CRs to become ready +- name: wait until the Compute control plane services CRs are ready ansible.builtin.include_tasks: file: wait.yaml diff --git a/tests/roles/nova_adoption/tasks/nova_libvirt.yaml b/tests/roles/nova_adoption/tasks/nova_libvirt.yaml index 7b290f7de..64fea33e4 100644 --- a/tests/roles/nova_adoption/tasks/nova_libvirt.yaml +++ b/tests/roles/nova_adoption/tasks/nova_libvirt.yaml @@ -2,7 +2,8 @@ ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} - oc patch openstackcontrolplane openstack -n openstack --type=merge --patch '{{ nova_libvirt_patch }}' + {{ nova_libvirt_patch }} + oc patch openstackcontrolplane openstack -n openstack --type=merge --patch-file=oscp-patch.yaml - name: wait for Nova control plane services' CRs to become ready ansible.builtin.include_tasks: @@ -13,17 +14,19 @@ file: check_endpoints.yaml # TODO(bogdando): provide automated checks for 'The expected changes to happen' -- name: query the superconductor for cell1 existance and compare it to pre-adoption values +- name: query the superconductor to check that the expected cells exist, and compare it to pre-adoption values ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} - {% if pulled_openstack_configuration_shell_headers is defined %} - {{ pulled_openstack_configuration_shell_headers }} - {% else %} - set +u - . ~/.source_cloud_exported_variables_default - set -u - {% endif %} + {{ cells_env }} + for CELL in $(echo $CELLS); do + set +u + . ~/.source_cloud_exported_variables_$CELL + set -u + RCELL=$CELL + [ "$CELL" = "default" ] && RCELL=$DEFAULT_CELL_NAME - echo $PULL_OPENSTACK_CONFIGURATION_NOVAMANAGE_CELL_MAPPINGS - oc rsh nova-cell0-conductor-0 nova-manage cell_v2 list_cells | grep -F '| cell1 |' + echo "comparing $CELL to $RCELL" + echo $PULL_OPENSTACK_CONFIGURATION_NOVAMANAGE_CELL_MAPPINGS | grep -F "| $CELL |" + oc rsh nova-cell0-conductor-0 nova-manage cell_v2 list_cells | grep -F "| $RCELL |" + done diff --git a/tests/roles/nova_adoption/tasks/wait.yaml b/tests/roles/nova_adoption/tasks/wait.yaml index 77a6d8e3c..7d59ed0c5 100644 --- a/tests/roles/nova_adoption/tasks/wait.yaml +++ b/tests/roles/nova_adoption/tasks/wait.yaml @@ -1,6 +1,6 @@ # NOTE(bogdando): Status phase 'Running' doesn't necessarily mean it IS running in fact. # Instead, wait for CR Ready status -- name: wait for Nova control plane services' CRs to become ready +- name: wait until the Compute control plane services CRs are ready ansible.builtin.shell: | {{ shell_header }} {{ oc_header }} diff --git a/tests/roles/ovn_adoption/tasks/main.yaml b/tests/roles/ovn_adoption/tasks/main.yaml index 3792bfe78..2b21c07c0 100644 --- a/tests/roles/ovn_adoption/tasks/main.yaml +++ b/tests/roles/ovn_adoption/tasks/main.yaml @@ -146,9 +146,9 @@ {{ oc_header }} {{ ovn_copy_shell_vars }} - $CONTROLLER1_SSH sudo systemctl stop tripleo_ovn_cluster_northd.service - $CONTROLLER2_SSH sudo systemctl stop tripleo_ovn_cluster_northd.service - $CONTROLLER3_SSH sudo systemctl stop tripleo_ovn_cluster_northd.service + $CONTROLLER1_SSH if sudo systemctl is-active tripleo_ovn_cluster_northd.service ';' then sudo systemctl stop tripleo_ovn_cluster_northd.service ';' fi + $CONTROLLER2_SSH if sudo systemctl is-active tripleo_ovn_cluster_northd.service ';' then sudo systemctl stop tripleo_ovn_cluster_northd.service ';' fi + $CONTROLLER3_SSH if sudo systemctl is-active tripleo_ovn_cluster_northd.service ';' then sudo systemctl stop tripleo_ovn_cluster_northd.service ';' fi # If ovn_adoption is done using scenario A (different networks between podified # and tripleo deployments) in order to be able to dump OVN database an iptable @@ -270,10 +270,10 @@ {{ oc_header }} {{ ovn_copy_shell_vars }} - $CONTROLLER1_SSH sudo systemctl stop tripleo_ovn_cluster_north_db_server.service - $CONTROLLER2_SSH sudo systemctl stop tripleo_ovn_cluster_north_db_server.service - $CONTROLLER3_SSH sudo systemctl stop tripleo_ovn_cluster_north_db_server.service + $CONTROLLER1_SSH if sudo systemctl is-active tripleo_ovn_cluster_north_db_server.service ';' then sudo systemctl stop tripleo_ovn_cluster_north_db_server.service ';' fi + $CONTROLLER2_SSH if sudo systemctl is-active tripleo_ovn_cluster_north_db_server.service ';' then sudo systemctl stop tripleo_ovn_cluster_north_db_server.service ';' fi + $CONTROLLER3_SSH if sudo systemctl is-active tripleo_ovn_cluster_north_db_server.service ';' then sudo systemctl stop tripleo_ovn_cluster_north_db_server.service ';' fi - $CONTROLLER1_SSH sudo systemctl stop tripleo_ovn_cluster_south_db_server.service - $CONTROLLER2_SSH sudo systemctl stop tripleo_ovn_cluster_south_db_server.service - $CONTROLLER3_SSH sudo systemctl stop tripleo_ovn_cluster_south_db_server.service + $CONTROLLER1_SSH if sudo systemctl is-active tripleo_ovn_cluster_south_db_server.service ';' then sudo systemctl stop tripleo_ovn_cluster_south_db_server.service ';' fi + $CONTROLLER2_SSH if sudo systemctl is-active tripleo_ovn_cluster_south_db_server.service ';' then sudo systemctl stop tripleo_ovn_cluster_south_db_server.service ';' fi + $CONTROLLER3_SSH if sudo systemctl is-active tripleo_ovn_cluster_south_db_server.service ';' then sudo systemctl stop tripleo_ovn_cluster_south_db_server.service ';' fi diff --git a/tests/vars.sample.yaml b/tests/vars.sample.yaml index 4d08c07ef..03290cc86 100644 --- a/tests/vars.sample.yaml +++ b/tests/vars.sample.yaml @@ -16,19 +16,67 @@ source_galera_members: source_mariadb_ip: default: 172.17.0.2 #CUSTOMIZE_THIS +# EDPM nodes info, for each cell compute (omitting dedicated cell controllers) on the destination cloud. # To enable TLS-E, the standalone hostname must be set to standalone.ooo.test -edpm_node_hostname: standalone.localdomain +# Defaults provided for a single-cell case. +# Provide for each cell on the target cloud, considering default_cell_name value. +# The defined 'networks' connections must match netconfig_networks which manages NetConfig CR +edpm_nodes: + cell1: + standalone: + hostName: standalone.localdomain + ansible: + ansibleHost: 192.168.122.100 + networks: + - defaultRoute: true + fixedIP: 192.168.122.100 + name: ctlplane + subnetName: subnet1 + - name: internalapi + subnetName: subnet2 + - name: storage + subnetName: subnet3 + - name: tenant + subnetName: subnet4 + - name: storagemgmt + subnetName: subnet5 # TODO: There is no reason to change the domain depending on the type of # deployment, but we are doing this to keep the CI green when TLS-E is merged. # This setting should at some point be switched in the CI to standalone.ooo.test # for all types of jobs and removed entirely afterwards. +# For a local libvirt setup outside of CI-framework, enable EDPM net config, and define netconfig_networks +# That is required to update DNS config in resolv.conf at very least, so that edpm can reach out OCP pods. +dataplane_os_net_config_set_route: false #CUSTOMIZE_THIS +netconfig_networks: #CUSTOMIZE_THIS + - name: ctlplane + dnsDomain: ctlplane.example.com + subnets: + - name: subnet1 + - name: internalapi + dnsDomain: internalapi.example.com + subnets: + - name: subnet2 + - name: storage + dnsDomain: storage.example.com + subnets: + - name: subnet3 + - name: tenant + dnsDomain: tenant.example.com + subnets: + - name: subnet4 + - name: storagemgmt + dnsDomain: storagemgmt.example.com + subnets: + - name: subnet5 + # If 'true', this flag will create a Barbican secret before the adoption runs # and after the adoption it'll be verified with the secret tills exists with # the same payload. For this flag to work with 'true' value, Barbican should be # available before the adoption prelaunch_barbican_secret: false + # Whether to use 'make crc_storage_cleanup; make crc_storage' before the test reset_crc_storage: true @@ -57,9 +105,6 @@ source_os_diff_config_ip: 192.168.122.100 # Source OVN DB IP for DB exports. source_ovndb_ip: 192.168.122.100 #CUSTOMIZE_THIS -# EDPM node IP -edpm_node_ip: 192.168.122.100 #CUSTOMIZE_THIS - # NTP servers list timesync_ntp_servers: # - clock.redhat.com # Will not work outside of RH intranet @@ -70,6 +115,11 @@ auth_url: http://keystone-public-openstack.apps-crc.testing # Set this to true if adopting the ironic services (ironic + ironic-inspector + nova w/compute-ironic) ironic_adoption: false +# provide the source cloud Ironic topology, for any cells with Ironic services +source_ironic_nodes: + default: + - name: standalone.localdomain + template: standalone # Run pre-adoption validation before the deploying run_pre_adoption_validation: true @@ -94,6 +144,9 @@ supported_backup_backends: #CUSTOMIZE_THIS # Whether the adopted node will host compute services compute_adoption: true +# For a multi-node, should be 'overcloud' +os_cloud_name: standalone + # Where perform or not telemetry installation during adoption telemetry_adoption: true