From dcd162b8c9b50891d2f100340caf11ff678490c5 Mon Sep 17 00:00:00 2001 From: Serge Logvinov Date: Mon, 13 May 2024 17:10:16 +0300 Subject: [PATCH] feat(occm): support multi region cluster Currently, it supports only single auth section. Set the regions in config as: [Global] region=REGION1 regions=REGION1 regions=REGION2 regions=REGION3 --- ...sing-openstack-cloud-controller-manager.md | 6 +- pkg/client/client.go | 17 ++ pkg/csi/cinder/openstack/openstack.go | 12 +- pkg/csi/cinder/openstack/openstack_test.go | 9 + pkg/openstack/instances.go | 182 ++++++++++++------ pkg/openstack/openstack.go | 9 +- 6 files changed, 175 insertions(+), 60 deletions(-) diff --git a/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md b/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md index 1cfce3135d..0f9c8068fb 100644 --- a/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md +++ b/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md @@ -113,7 +113,9 @@ The options in `Global` section are used for openstack-cloud-controller-manager * `password` Keystone user password. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required. * `region` - Required. Keystone region name. + Required. Keystone region name. The name of region will be set in the `topology.kubernetes.io/region` label of the node. +* `regions` + Optional. Keystone region name, which is used to specify regions for the cloud provider where the instance is running. Can be specified multiple times.The region name may or may not be the same as the region name in the `region` option. They merge together at runtime. * `domain-id` Keystone user domain ID. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required. * `domain-name` @@ -317,7 +319,7 @@ Although the openstack-cloud-controller-manager was initially implemented with N call](https://docs.openstack.org/api-ref/load-balancer/v2/?expanded=create-a-load-balancer-detail#creating-a-fully-populated-load-balancer). Setting this option to true will create loadbalancers using serial API calls which first create an unpopulated loadbalancer, then populate its listeners, pools and members. This is a compatibility option at the expense of - increased load on the OpenStack API. Default: false + increased load on the OpenStack API. Default: false NOTE: diff --git a/pkg/client/client.go b/pkg/client/client.go index e9be5312b2..486abafcab 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -23,6 +23,7 @@ import ( "fmt" "net/http" "runtime" + "slices" "strings" "github.com/gophercloud/gophercloud/v2" @@ -53,6 +54,7 @@ type AuthOpts struct { UserDomainID string `gcfg:"user-domain-id" mapstructure:"user-domain-id" name:"os-userDomainID" value:"optional"` UserDomainName string `gcfg:"user-domain-name" mapstructure:"user-domain-name" name:"os-userDomainName" value:"optional"` Region string `name:"os-region"` + Regions []string `name:"os-regions" value:"optional"` EndpointType gophercloud.Availability `gcfg:"os-endpoint-type" mapstructure:"os-endpoint-type" name:"os-endpointType" value:"optional"` CAFile string `gcfg:"ca-file" mapstructure:"ca-file" name:"os-certAuthorityPath" value:"optional"` TLSInsecure string `gcfg:"tls-insecure" mapstructure:"tls-insecure" name:"os-TLSInsecure" value:"optional" matches:"^true|false$"` @@ -87,6 +89,7 @@ func LogCfg(authOpts AuthOpts) { klog.V(5).Infof("UserDomainID: %s", authOpts.UserDomainID) klog.V(5).Infof("UserDomainName: %s", authOpts.UserDomainName) klog.V(5).Infof("Region: %s", authOpts.Region) + klog.V(5).Infof("Regions: %s", authOpts.Regions) klog.V(5).Infof("EndpointType: %s", authOpts.EndpointType) klog.V(5).Infof("CAFile: %s", authOpts.CAFile) klog.V(5).Infof("CertFile: %s", authOpts.CertFile) @@ -232,6 +235,20 @@ func ReadClouds(authOpts *AuthOpts) error { authOpts.ApplicationCredentialName = replaceEmpty(authOpts.ApplicationCredentialName, cloud.AuthInfo.ApplicationCredentialName) authOpts.ApplicationCredentialSecret = replaceEmpty(authOpts.ApplicationCredentialSecret, cloud.AuthInfo.ApplicationCredentialSecret) + regions := strings.Split(authOpts.Region, ",") + if len(regions) > 1 { + authOpts.Region = regions[0] + } + + for _, r := range cloud.Regions { + // Support only single auth section in clouds.yaml + if r.Values.AuthInfo == nil && r.Name != authOpts.Region && !slices.Contains(regions, r.Name) { + regions = append(regions, r.Name) + } + } + + authOpts.Regions = regions + return nil } diff --git a/pkg/csi/cinder/openstack/openstack.go b/pkg/csi/cinder/openstack/openstack.go index 26f96dd01b..7ed28e3671 100644 --- a/pkg/csi/cinder/openstack/openstack.go +++ b/pkg/csi/cinder/openstack/openstack.go @@ -20,6 +20,7 @@ import ( "fmt" "net/http" "os" + "slices" "github.com/gophercloud/gophercloud/v2" "github.com/gophercloud/gophercloud/v2/openstack" @@ -126,7 +127,7 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) { } } - for _, global := range cfg.Global { + for idx, global := range cfg.Global { // Update the config with data from clouds.yaml if UseClouds is enabled if global.UseClouds { if global.CloudsFile != "" { @@ -138,6 +139,15 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) { } klog.V(5).Infof("Credentials are loaded from %s:", global.CloudsFile) } + + regions := []string{global.Region} + for _, region := range cfg.Global[idx].Regions { + if !slices.Contains(regions, region) { + regions = append(regions, region) + } + } + + cfg.Global[idx].Regions = regions } return cfg, nil diff --git a/pkg/csi/cinder/openstack/openstack_test.go b/pkg/csi/cinder/openstack/openstack_test.go index 647787e9bb..966591fddd 100644 --- a/pkg/csi/cinder/openstack/openstack_test.go +++ b/pkg/csi/cinder/openstack/openstack_test.go @@ -68,6 +68,7 @@ tenant-id=` + fakeTenantID + ` domain-id=` + fakeDomainID + ` ca-file=` + fakeCAfile + ` region=` + fakeRegion + ` +regions=` + fakeRegion + ` [Global "cloud2"] username=` + fakeUserName_cloud2 + ` password=` + fakePassword_cloud2 + ` @@ -76,6 +77,8 @@ tenant-id=` + fakeTenantID_cloud2 + ` domain-id=` + fakeDomainID_cloud2 + ` ca-file=` + fakeCAfile_cloud2 + ` region=` + fakeRegion_cloud2 + ` +regions=` + fakeRegion_cloud2 + ` +regions=` + fakeRegion_cloud2 + ` [Global "cloud3"] username=` + fakeUserName_cloud3 + ` password=` + fakePassword_cloud3 + ` @@ -112,6 +115,7 @@ rescan-on-resize=true` CAFile: fakeCAfile, TenantID: fakeTenantID, Region: fakeRegion, + Regions: []string{fakeRegion}, } expectedOpts.Global["cloud2"] = &client.AuthOpts{ Username: fakeUserName_cloud2, @@ -121,6 +125,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud2, TenantID: fakeTenantID_cloud2, Region: fakeRegion_cloud2, + Regions: []string{fakeRegion_cloud2}, } expectedOpts.Global["cloud3"] = &client.AuthOpts{ Username: fakeUserName_cloud3, @@ -130,6 +135,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud3, TenantID: fakeTenantID_cloud3, Region: fakeRegion_cloud3, + Regions: []string{fakeRegion_cloud3}, } expectedOpts.BlockStorage.RescanOnResize = true @@ -224,6 +230,7 @@ rescan-on-resize=true` CAFile: fakeCAfile, TenantID: fakeTenantID, Region: fakeRegion, + Regions: []string{fakeRegion}, EndpointType: gophercloud.AvailabilityPublic, UseClouds: true, CloudsFile: wd + "/fixtures/clouds.yaml", @@ -237,6 +244,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud2, TenantID: fakeTenantID_cloud2, Region: fakeRegion_cloud2, + Regions: []string{fakeRegion_cloud2}, EndpointType: gophercloud.AvailabilityPublic, UseClouds: true, CloudsFile: wd + "/fixtures/clouds.yaml", @@ -250,6 +258,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud3, TenantID: fakeTenantID_cloud3, Region: fakeRegion_cloud3, + Regions: []string{fakeRegion_cloud3}, EndpointType: gophercloud.AvailabilityPublic, UseClouds: true, CloudsFile: wd + "/fixtures/clouds.yaml", diff --git a/pkg/openstack/instances.go b/pkg/openstack/instances.go index f9b617736f..5917be1c4a 100644 --- a/pkg/openstack/instances.go +++ b/pkg/openstack/instances.go @@ -21,6 +21,7 @@ import ( "fmt" sysos "os" "regexp" + "slices" "strings" "github.com/gophercloud/gophercloud/v2" @@ -46,9 +47,9 @@ const ( // InstancesV2 encapsulates an implementation of InstancesV2 for OpenStack. type InstancesV2 struct { - compute *gophercloud.ServiceClient - network *gophercloud.ServiceClient - region string + compute map[string]*gophercloud.ServiceClient + network map[string]*gophercloud.ServiceClient + regions []string regionProviderID bool networkingOpts NetworkingOpts } @@ -57,16 +58,25 @@ type InstancesV2 struct { func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) { klog.V(4).Info("openstack.Instancesv2() called") - compute, err := client.NewComputeV2(os.provider, os.epOpts) - if err != nil { - klog.Errorf("unable to access compute v2 API : %v", err) - return nil, false - } + var err error + compute := make(map[string]*gophercloud.ServiceClient, len(os.regions)) + network := make(map[string]*gophercloud.ServiceClient, len(os.regions)) - network, err := client.NewNetworkV2(os.provider, os.epOpts) - if err != nil { - klog.Errorf("unable to access network v2 API : %v", err) - return nil, false + for _, region := range os.regions { + opt := os.epOpts + opt.Region = region + + compute[region], err = client.NewComputeV2(os.provider, opt) + if err != nil { + klog.Errorf("unable to access compute v2 API : %v", err) + return nil, false + } + + network[region], err = client.NewNetworkV2(os.provider, opt) + if err != nil { + klog.Errorf("unable to access network v2 API : %v", err) + return nil, false + } } regionalProviderID := false @@ -77,7 +87,7 @@ func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) { return &InstancesV2{ compute: compute, network: network, - region: os.epOpts.Region, + regions: os.regions, regionProviderID: regionalProviderID, networkingOpts: os.networkingOpts, }, true @@ -85,9 +95,15 @@ func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) { // InstanceExists indicates whether a given node exists according to the cloud provider func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) { - _, err := i.getInstance(ctx, node) + klog.V(4).InfoS("openstack.InstanceExists() called", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) + + _, _, err := i.getInstance(ctx, node) if err == cloudprovider.InstanceNotFound { - klog.V(6).Infof("instance not found for node: %s", node.Name) + klog.V(6).InfoS("Node is not found in cloud provider", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) return false, nil } @@ -100,7 +116,11 @@ func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, // InstanceShutdown returns true if the instance is shutdown according to the cloud provider. func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) { - server, err := i.getInstance(ctx, node) + klog.V(4).InfoS("openstack.InstanceShutdown() called", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) + + server, _, err := i.getInstance(ctx, node) if err != nil { return false, err } @@ -115,7 +135,11 @@ func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool // InstanceMetadata returns the instance's metadata. func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) { - srv, err := i.getInstance(ctx, node) + klog.V(4).InfoS("openstack.InstanceMetadata() called", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) + + srv, region, err := i.getInstance(ctx, node) if err != nil { return nil, err } @@ -124,17 +148,17 @@ func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*clo server = *srv } - instanceType, err := srvInstanceType(ctx, i.compute, &server) + instanceType, err := srvInstanceType(ctx, i.compute[region], &server) if err != nil { return nil, err } - ports, err := getAttachedPorts(ctx, i.network, server.ID) + ports, err := getAttachedPorts(ctx, i.network[region], server.ID) if err != nil { return nil, err } - addresses, err := nodeAddresses(ctx, &server, ports, i.network, i.networkingOpts) + addresses, err := nodeAddresses(ctx, &server, ports, i.network[region], i.networkingOpts) if err != nil { return nil, err } @@ -142,76 +166,122 @@ func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*clo availabilityZone := util.SanitizeLabel(server.AvailabilityZone) return &cloudprovider.InstanceMetadata{ - ProviderID: i.makeInstanceID(&server), + ProviderID: i.makeInstanceID(&server, region), InstanceType: instanceType, NodeAddresses: addresses, Zone: availabilityZone, - Region: i.region, + Region: region, }, nil } -func (i *InstancesV2) makeInstanceID(srv *servers.Server) string { +func (i *InstancesV2) makeInstanceID(srv *servers.Server, region string) string { if i.regionProviderID { - return fmt.Sprintf("%s://%s/%s", ProviderName, i.region, srv.ID) + return fmt.Sprintf("%s://%s/%s", ProviderName, region, srv.ID) } return fmt.Sprintf("%s:///%s", ProviderName, srv.ID) } -func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, error) { - if node.Spec.ProviderID == "" { - return getServerByName(ctx, i.compute, node.Name) +func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, string, error) { + var instanceID, instanceRegion string + + if node.Spec.ProviderID != "" { + var err error + + instanceID, instanceRegion, err = instanceIDFromProviderID(node.Spec.ProviderID) + if err != nil { + return nil, "", err + } } - instanceID, instanceRegion, err := instanceIDFromProviderID(node.Spec.ProviderID) - if err != nil { - return nil, err + if instanceRegion != "" { + if slices.Contains(i.regions, instanceRegion) { + return i.getInstanceByID(ctx, instanceID, []string{instanceRegion}) + } + + return nil, "", fmt.Errorf("getInstance: ProviderID \"%s\" didn't match supported regions \"%s\"", node.Spec.ProviderID, strings.Join(i.regions, ",")) } - if instanceRegion != "" && instanceRegion != i.region { - return nil, fmt.Errorf("ProviderID \"%s\" didn't match supported region \"%s\"", node.Spec.ProviderID, i.region) + // At this point we know that ProviderID is not properly set or it doesn't contain region information + // We need to search for the instance in all regions + var searchRegions []string + + // We cannot trust the region label, so we need to check the region + instanceRegion = node.Labels[v1.LabelTopologyRegion] + if slices.Contains(i.regions, instanceRegion) { + searchRegions = []string{instanceRegion} } + for _, r := range i.regions { + if r != instanceRegion { + searchRegions = append(searchRegions, r) + } + } + + klog.V(4).InfoS("openstack.getInstance() trying to find the instance in regions", "node", klog.KObj(node), + "instanceID", instanceID, + "regions", strings.Join(searchRegions, ",")) + + if instanceID == "" { + return i.getInstanceByName(ctx, node.Name, searchRegions) + } + + return i.getInstanceByID(ctx, instanceID, searchRegions) +} + +func (i *InstancesV2) getInstanceByID(ctx context.Context, instanceID string, searchRegions []string) (*servers.Server, string, error) { + server := servers.Server{} + mc := metrics.NewMetricContext("server", "get") - server, err := servers.Get(ctx, i.compute, instanceID).Extract() - if mc.ObserveRequest(err) != nil { - if errors.IsNotFound(err) { - return nil, cloudprovider.InstanceNotFound + for _, r := range searchRegions { + err := servers.Get(ctx, i.compute[r], instanceID).ExtractInto(&server) + if mc.ObserveRequest(err) != nil { + if errors.IsNotFound(err) { + continue + } + + return nil, "", err } - return nil, err + + return &server, r, nil } - return server, nil + + return nil, "", cloudprovider.InstanceNotFound } -func getServerByName(ctx context.Context, client *gophercloud.ServiceClient, name string) (*servers.Server, error) { +func (i *InstancesV2) getInstanceByName(ctx context.Context, name string, searchRegions []string) (*servers.Server, string, error) { opts := servers.ListOpts{ Name: fmt.Sprintf("^%s$", regexp.QuoteMeta(name)), } serverList := make([]servers.Server, 0, 1) - mc := metrics.NewMetricContext("server", "list") - pager := servers.List(client, opts) - err := pager.EachPage(ctx, func(_ context.Context, page pagination.Page) (bool, error) { - s, err := servers.ExtractServers(page) - if err != nil { - return false, err + for _, r := range searchRegions { + pager := servers.List(i.compute[r], opts) + + err := pager.EachPage(ctx, func(_ context.Context, page pagination.Page) (bool, error) { + s, err := servers.ExtractServers(page) + if err != nil { + return false, err + } + serverList = append(serverList, s...) + if len(serverList) > 1 { + return false, errors.ErrMultipleResults + } + return true, nil + }) + if mc.ObserveRequest(err) != nil { + return nil, "", err } - serverList = append(serverList, s...) - if len(serverList) > 1 { - return false, errors.ErrMultipleResults + + if len(serverList) == 0 { + continue } - return true, nil - }) - if mc.ObserveRequest(err) != nil { - return nil, err - } - if len(serverList) == 0 { - return nil, errors.ErrNotFound + return &serverList[0], r, nil } - return &serverList[0], nil + return nil, "", cloudprovider.InstanceNotFound } // If Instances.InstanceID or cloudprovider.GetInstanceProviderID is changed, the regexp should be changed too. diff --git a/pkg/openstack/openstack.go b/pkg/openstack/openstack.go index 8450bc6009..441201785c 100644 --- a/pkg/openstack/openstack.go +++ b/pkg/openstack/openstack.go @@ -111,7 +111,7 @@ type LoadBalancerOpts struct { MonitorMaxRetriesDown uint `gcfg:"monitor-max-retries-down"` ManageSecurityGroups bool `gcfg:"manage-security-groups"` InternalLB bool `gcfg:"internal-lb"` // default false - NodeSelector string `gcfg:"node-selector"` // If specified, the loadbalancer members will be assined only from nodes list filtered by node-selector labels + NodeSelector string `gcfg:"node-selector"` // If specified, the loadbalancer members will be assigned only from nodes list filtered by node-selector labels CascadeDelete bool `gcfg:"cascade-delete"` FlavorID string `gcfg:"flavor-id"` AvailabilityZone string `gcfg:"availability-zone"` @@ -151,6 +151,7 @@ type RouterOpts struct { // OpenStack is an implementation of cloud provider Interface for OpenStack. type OpenStack struct { + regions []string provider *gophercloud.ProviderClient epOpts *gophercloud.EndpointOpts lbOpts LoadBalancerOpts @@ -247,6 +248,11 @@ func ReadConfig(config io.Reader) (Config, error) { klog.V(5).Infof("Config, loaded from the %s:", cfg.Global.CloudsFile) client.LogCfg(cfg.Global) } + + if len(cfg.Global.Regions) == 0 { + cfg.Global.Regions = []string{cfg.Global.Region} + } + // Set the default values for search order if not set if cfg.Metadata.SearchOrder == "" { cfg.Metadata.SearchOrder = fmt.Sprintf("%s,%s", metadata.ConfigDriveID, metadata.MetadataID) @@ -293,6 +299,7 @@ func NewOpenStack(cfg Config) (*OpenStack, error) { provider.HTTPClient.Timeout = cfg.Metadata.RequestTimeout.Duration os := OpenStack{ + regions: cfg.Global.Regions, provider: provider, epOpts: &gophercloud.EndpointOpts{ Region: cfg.Global.Region,