diff --git a/api/v1alpha3/conversion_test.go b/api/v1alpha3/conversion_test.go index 8a9b4932f6..e878f67e63 100644 --- a/api/v1alpha3/conversion_test.go +++ b/api/v1alpha3/conversion_test.go @@ -199,6 +199,7 @@ func TestFuzzyConversion(t *testing.T) { v1alpha5ClusterSpec.APIServerFixedIP = "" v1alpha5ClusterSpec.AllowAllInClusterTraffic = false v1alpha5ClusterSpec.DisableAPIServerFloatingIP = false + v1alpha5ClusterSpec.IgnoreAvailabilityZones = false }, func(v1alpha5MachineSpec *infrav1.OpenStackMachineSpec, c fuzz.Continue) { c.FuzzNoCustom(v1alpha5MachineSpec) diff --git a/api/v1alpha3/zz_generated.conversion.go b/api/v1alpha3/zz_generated.conversion.go index 0fe68a9881..f42bd618b4 100644 --- a/api/v1alpha3/zz_generated.conversion.go +++ b/api/v1alpha3/zz_generated.conversion.go @@ -733,6 +733,7 @@ func autoConvert_v1alpha5_OpenStackClusterSpec_To_v1alpha3_OpenStackClusterSpec( if err := Convert_v1beta1_APIEndpoint_To_v1alpha3_APIEndpoint(&in.ControlPlaneEndpoint, &out.ControlPlaneEndpoint, s); err != nil { return err } + // WARNING: in.IgnoreAvailabilityZones requires manual conversion: does not exist in peer-type out.ControlPlaneAvailabilityZones = *(*[]string)(unsafe.Pointer(&in.ControlPlaneAvailabilityZones)) if in.Bastion != nil { in, out := &in.Bastion, &out.Bastion diff --git a/api/v1alpha4/conversion_test.go b/api/v1alpha4/conversion_test.go index dd502b5aaa..1accdd7693 100644 --- a/api/v1alpha4/conversion_test.go +++ b/api/v1alpha4/conversion_test.go @@ -318,6 +318,12 @@ func TestFuzzyConversion(t *testing.T) { v1alpha5RootVolume.VolumeType = "" v1alpha5RootVolume.AvailabilityZone = "" }, + // Don't test hub-spoke-hub conversion of v1alpha5 fields which are not in v1alpha4 + func(v1alpha5ClusterSpec *infrav1.OpenStackClusterSpec, c fuzz.Continue) { + c.FuzzNoCustom(v1alpha5ClusterSpec) + + v1alpha5ClusterSpec.IgnoreAvailabilityZones = false + }, } } diff --git a/api/v1alpha4/zz_generated.conversion.go b/api/v1alpha4/zz_generated.conversion.go index 5e684eca67..016e2ef8e9 100644 --- a/api/v1alpha4/zz_generated.conversion.go +++ b/api/v1alpha4/zz_generated.conversion.go @@ -867,6 +867,7 @@ func autoConvert_v1alpha5_OpenStackClusterSpec_To_v1alpha4_OpenStackClusterSpec( out.DisablePortSecurity = in.DisablePortSecurity out.Tags = *(*[]string)(unsafe.Pointer(&in.Tags)) out.ControlPlaneEndpoint = in.ControlPlaneEndpoint + // WARNING: in.IgnoreAvailabilityZones requires manual conversion: does not exist in peer-type out.ControlPlaneAvailabilityZones = *(*[]string)(unsafe.Pointer(&in.ControlPlaneAvailabilityZones)) if in.Bastion != nil { in, out := &in.Bastion, &out.Bastion diff --git a/api/v1alpha5/openstackcluster_types.go b/api/v1alpha5/openstackcluster_types.go index 06f4e5f465..bd6758e14d 100644 --- a/api/v1alpha5/openstackcluster_types.go +++ b/api/v1alpha5/openstackcluster_types.go @@ -123,6 +123,14 @@ type OpenStackClusterSpec struct { // +optional ControlPlaneEndpoint clusterv1.APIEndpoint `json:"controlPlaneEndpoint"` + // IgnoreAvailabilityZones disables the use of availability zones/failure domains, + // allowing Nova to schedule machines in any compatible AZ. + // This only affects control plane nodes - explicit failure domains can still be + // specified for worker nodes using MachineDeployment.spec.template.spec.failureDomain + // if desired. + // +optional + IgnoreAvailabilityZones bool `json:"ignoreAvailabilityZones"` + // ControlPlaneAvailabilityZones is the az to deploy control plane to ControlPlaneAvailabilityZones []string `json:"controlPlaneAvailabilityZones,omitempty"` diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclusters.yaml index 85c47240d3..a9d363c804 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclusters.yaml @@ -3063,6 +3063,13 @@ spec: - kind - name type: object + ignoreAvailabilityZones: + description: IgnoreAvailabilityZones disables the use of availability + zones/failure domains, allowing Nova to schedule machines in any + compatible AZ. This only affects control plane nodes - explicit + failure domains can still be specified for worker nodes using MachineDeployment.spec.template.spec.failureDomain + if desired. + type: boolean managedSecurityGroups: description: ManagedSecurityGroups determines whether OpenStack security groups for the cluster will be managed by the OpenStack provider diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclustertemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclustertemplates.yaml index 65226a1deb..11110be9a1 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclustertemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_openstackclustertemplates.yaml @@ -1344,6 +1344,14 @@ spec: - kind - name type: object + ignoreAvailabilityZones: + description: IgnoreAvailabilityZones disables the use of availability + zones/failure domains, allowing Nova to schedule machines + in any compatible AZ. This only affects control plane nodes + - explicit failure domains can still be specified for worker + nodes using MachineDeployment.spec.template.spec.failureDomain + if desired. + type: boolean managedSecurityGroups: description: ManagedSecurityGroups determines whether OpenStack security groups for the cluster will be managed by the OpenStack diff --git a/controllers/openstackcluster_controller.go b/controllers/openstackcluster_controller.go index a71f82dae7..1f0b3157f3 100644 --- a/controllers/openstackcluster_controller.go +++ b/controllers/openstackcluster_controller.go @@ -264,28 +264,30 @@ func reconcileNormal(ctx context.Context, scope *scope.Scope, patchHelper *patch return reconcile.Result{}, err } - availabilityZones, err := computeService.GetAvailabilityZones() - if err != nil { - return ctrl.Result{}, err - } - - // Create a new list to remove any Availability - // Zones that have been removed from OpenStack + // Create a new list to remove any Availability Zones from our status that no longer exist in OpenStack openStackCluster.Status.FailureDomains = make(clusterv1.FailureDomains) - for _, az := range availabilityZones { - found := true - // If Az given, then check whether it's in the allow list - // If no Az given, then by default put into allow list - if len(openStackCluster.Spec.ControlPlaneAvailabilityZones) > 0 { - if contains(openStackCluster.Spec.ControlPlaneAvailabilityZones, az.ZoneName) { - found = true - } else { - found = false - } + // Only populate the failure domains if we want them to be considered + if !openStackCluster.Spec.IgnoreAvailabilityZones { + availabilityZones, err := computeService.GetAvailabilityZones() + if err != nil { + return ctrl.Result{}, err } - openStackCluster.Status.FailureDomains[az.ZoneName] = clusterv1.FailureDomainSpec{ - ControlPlane: found, + for _, az := range availabilityZones { + found := true + // If Az given, then check whether it's in the allow list + // If no Az given, then by default put into allow list + if len(openStackCluster.Spec.ControlPlaneAvailabilityZones) > 0 { + if contains(openStackCluster.Spec.ControlPlaneAvailabilityZones, az.ZoneName) { + found = true + } else { + found = false + } + } + + openStackCluster.Status.FailureDomains[az.ZoneName] = clusterv1.FailureDomainSpec{ + ControlPlane: found, + } } } diff --git a/controllers/openstackmachine_controller.go b/controllers/openstackmachine_controller.go index 2680ef6931..b226ac4f77 100644 --- a/controllers/openstackmachine_controller.go +++ b/controllers/openstackmachine_controller.go @@ -421,10 +421,6 @@ func machineToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, machine * return nil, fmt.Errorf("create Options need be specified to create instace") } - if machine.Spec.FailureDomain == nil { - return nil, fmt.Errorf("failure domain not set") - } - instanceSpec := compute.InstanceSpec{ Name: openStackMachine.Name, Image: openStackMachine.Spec.Image, @@ -434,13 +430,17 @@ func machineToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, machine * UserData: userData, Metadata: openStackMachine.Spec.ServerMetadata, ConfigDrive: openStackMachine.Spec.ConfigDrive != nil && *openStackMachine.Spec.ConfigDrive, - FailureDomain: *machine.Spec.FailureDomain, RootVolume: openStackMachine.Spec.RootVolume, Subnet: openStackMachine.Spec.Subnet, ServerGroupID: openStackMachine.Spec.ServerGroupID, Trunk: openStackMachine.Spec.Trunk, } + // Add the failure domain only if specified + if machine.Spec.FailureDomain != nil { + instanceSpec.FailureDomain = *machine.Spec.FailureDomain + } + machineTags := []string{} // Append machine specific tags diff --git a/docs/book/src/clusteropenstack/configuration.md b/docs/book/src/clusteropenstack/configuration.md index 394bad98d6..d35aef5428 100644 --- a/docs/book/src/clusteropenstack/configuration.md +++ b/docs/book/src/clusteropenstack/configuration.md @@ -107,9 +107,17 @@ Note: you need to set `clusterctl.cluster.x-k8s.io/move` label for the secret cr ## Availability zone -The availability zone names must be exposed as an environment variable `OPENSTACK_FAILURE_DOMAIN`. +When using the provided templates, the availability zone name must be exposed as an environment variable `OPENSTACK_FAILURE_DOMAIN`. -By default, if `Availability zone` is not given, all `Availability zone` that defined in openstack will be a candidate to provision from, If administrator credential is used then `internal` Availability zone which is internal only Availability zone inside `nova` will be returned and can cause potential problem, see [PR 1165](https://github.com/kubernetes-sigs/cluster-api-provider-openstack/pull/1165) for further information. So we highly recommend to set `Availability zone` explicitly. +However it is possible to provision clusters without using explicit availability zones, allowing Nova to select appropriate availability zones based on other scheduling constraints (e.g. flavor traits, host aggregates). + +For machine deployments (e.g. worker nodes), this can be achieved simply by not specifying a `failureDomain` in the machine template spec. For the control plane, the default behaviour is to explicitly schedule control plane nodes across all the availability zones. To disable this behaviour and allow control plane nodes _without_ explicit availability zones, you must set `OpenStackCluster.spec.ignoreAvailabilityZones: true`. + +> **WARNING** +> +> If an administrator credential is used then the `internal` availability zone will be returned, which can cause issues - see [PR 1165](https://github.com/kubernetes-sigs/cluster-api-provider-openstack/pull/1165) for further information. +> +> We highly recommend **not** using an administrator credential, but if it is required you should set your availability zones explicitly using `OpenStackCluster.spec.controlPlaneAvailabilityZones` and `MachineDeployment.spec.template.spec.failureDomain`. ## DNS server