From 09be88b4d0eed6f22565cb57abd4cbee857afcbd Mon Sep 17 00:00:00 2001 From: Ludwig Bedacht Date: Wed, 10 Jan 2024 17:56:21 +0100 Subject: [PATCH] updated lan creation logic --- api/v1alpha1/ionoscloudcluster_types.go | 2 +- api/v1alpha1/ionoscloudmachine_types.go | 18 ++ go.mod | 2 +- go.sum | 2 + .../ionoscloudcluster_controller.go | 8 +- .../ionoscloudmachine_controller.go | 106 ++++++-- internal/ionoscloud/client/client.go | 7 +- internal/service/{ => cloud}/datacenter.go | 4 +- internal/service/cloud/network.go | 226 ++++++++++++++++++ internal/service/cloud/request.go | 35 +++ internal/service/{ => cloud}/service.go | 18 +- internal/service/network.go | 167 ------------- scope/cluster.go | 6 +- scope/machine.go | 32 +++ 14 files changed, 420 insertions(+), 213 deletions(-) rename internal/service/{ => cloud}/datacenter.go (91%) create mode 100644 internal/service/cloud/network.go create mode 100644 internal/service/cloud/request.go rename internal/service/{ => cloud}/service.go (64%) delete mode 100644 internal/service/network.go diff --git a/api/v1alpha1/ionoscloudcluster_types.go b/api/v1alpha1/ionoscloudcluster_types.go index 9f4fc65b..35e6bd49 100644 --- a/api/v1alpha1/ionoscloudcluster_types.go +++ b/api/v1alpha1/ionoscloudcluster_types.go @@ -23,7 +23,7 @@ import ( const ( // ClusterFinalizer allows cleanup of resources, which are - // associated with the IonosCloudCluster before removing it from the apiserver. + // associated with the IonosCloudCluster before removing it from the API server. ClusterFinalizer = "ionoscloudcluster.infrastructure.cluster.x-k8s.io" // IonosCloudClusterReady is the condition for the IonosCloudCluster, which indicates that the cluster is ready. diff --git a/api/v1alpha1/ionoscloudmachine_types.go b/api/v1alpha1/ionoscloudmachine_types.go index b95e9bb4..37009e23 100644 --- a/api/v1alpha1/ionoscloudmachine_types.go +++ b/api/v1alpha1/ionoscloudmachine_types.go @@ -25,6 +25,24 @@ import ( const ( // IonosCloudMachineType is the named type for the API object. IonosCloudMachineType = "IonosCloudMachine" + + // MachineFinalizer is the finalizer for the IonosCloudMachine resources. + // It will prevent the deletion of the resource until it was removed by the controller + // to ensure that related cloud resources will be deleted before the IonosCloudMachine resource + // will be removed from the API server. + MachineFinalizer = "ionoscloudmachine.infrastructure.cluster.x-k8s.io" + + // MachineProvisionedCondition documents the status of the provisioning of a IonosCloudMachine and + // the underlying enterprise VM. + MachineProvisionedCondition clusterv1.ConditionType = "MachineProvisioned" + + // WaitingForClusterInfrastructureReason (Severity=Info) indicates, that the IonosCloudMachine is currently + // waiting for the cluster infrastructure to become ready. + WaitingForClusterInfrastructureReason = "WaitingForClusterInfrastructure" + + // WaitingForBootstrapDataReason (Severity=Info) indicates, that the bootstrap provider has not yet finished + // creating the bootstrap data secret and store it in the Cluster API Machine. + WaitingForBootstrapDataReason = "WaitingForBootstrapData" ) // VolumeDiskType specifies the type of hard disk. diff --git a/go.mod b/go.mod index be55218c..88dd44c5 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( k8s.io/apimachinery v0.28.4 k8s.io/client-go v0.28.4 k8s.io/klog/v2 v2.110.1 - k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 + k8s.io/utils v0.0.0-20240102154912-e7106e64919e sigs.k8s.io/cluster-api v1.6.0 sigs.k8s.io/controller-runtime v0.16.3 ) diff --git a/go.sum b/go.sum index 55af2adf..15b19b7e 100644 --- a/go.sum +++ b/go.sum @@ -291,6 +291,8 @@ k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5Ohx k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9/go.mod h1:wZK2AVp1uHCp4VamDVgBP2COHZjqD1T68Rf0CM3YjSM= k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 h1:qY1Ad8PODbnymg2pRbkyMT/ylpTrCM8P2RJ0yroCyIk= k8s.io/utils v0.0.0-20230406110748-d93618cff8a2/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ= +k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/cluster-api v1.6.0 h1:2bhVSnUbtWI8taCjd9lGiHExsRUpKf7Z1fXqi/IwYx4= sigs.k8s.io/cluster-api v1.6.0/go.mod h1:LB7u/WxiWj4/bbpHNOa1oQ8nq0MQ5iYlD0pGfRSBGLI= sigs.k8s.io/controller-runtime v0.16.3 h1:2TuvuokmfXvDUamSx1SuAOO3eTyye+47mJCigwG62c4= diff --git a/internal/controller/ionoscloudcluster_controller.go b/internal/controller/ionoscloudcluster_controller.go index a943ebb7..944ad60e 100644 --- a/internal/controller/ionoscloudcluster_controller.go +++ b/internal/controller/ionoscloudcluster_controller.go @@ -104,11 +104,7 @@ func (r *IonosCloudClusterReconciler) Reconcile(ctx context.Context, req ctrl.Re // Make sure to persist the changes to the cluster before exiting the function. defer func() { if err := clusterScope.Finalize(); err != nil { - if retErr != nil { - retErr = errors.Join(err, retErr) - return - } - retErr = err + retErr = errors.Join(err, retErr) } }() @@ -121,9 +117,7 @@ func (r *IonosCloudClusterReconciler) Reconcile(ctx context.Context, req ctrl.Re //nolint:unparam func (r *IonosCloudClusterReconciler) reconcileNormal(_ context.Context, clusterScope *scope.ClusterScope) (ctrl.Result, error) { - // TODO(lubedacht): setup cloud resources which are required before we create the machines controllerutil.AddFinalizer(clusterScope.IonosCluster, infrav1.ClusterFinalizer) - conditions.MarkTrue(clusterScope.IonosCluster, infrav1.IonosCloudClusterReady) clusterScope.IonosCluster.Status.Ready = true diff --git a/internal/controller/ionoscloudmachine_controller.go b/internal/controller/ionoscloudmachine_controller.go index 4bece503..9dde779b 100644 --- a/internal/controller/ionoscloudmachine_controller.go +++ b/internal/controller/ionoscloudmachine_controller.go @@ -18,7 +18,12 @@ package controller import ( "context" + "errors" "fmt" + "github.com/ionos-cloud/cluster-api-provider-ionoscloud/internal/service/cloud" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "time" "github.com/go-logr/logr" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -33,7 +38,6 @@ import ( infrav1 "github.com/ionos-cloud/cluster-api-provider-ionoscloud/api/v1alpha1" "github.com/ionos-cloud/cluster-api-provider-ionoscloud/internal/ionoscloud" - "github.com/ionos-cloud/cluster-api-provider-ionoscloud/internal/service" "github.com/ionos-cloud/cluster-api-provider-ionoscloud/scope" ) @@ -94,11 +98,11 @@ func (r *IonosCloudMachineReconciler) Reconcile(ctx context.Context, req ctrl.Re logger = logger.WithValues("cluster", klog.KObj(cluster)) - infraCluster, err := r.getInfraCluster(ctx, &logger, cluster, ionosCloudMachine) + clusterScope, err := r.getClusterScope(ctx, &logger, cluster, ionosCloudMachine) if err != nil { return ctrl.Result{}, fmt.Errorf("error getting infra provider cluster or control plane object: %w", err) } - if infraCluster == nil { + if clusterScope == nil { logger.Info("ionos cloud machine is not ready yet") return ctrl.Result{}, nil } @@ -108,7 +112,7 @@ func (r *IonosCloudMachineReconciler) Reconcile(ctx context.Context, req ctrl.Re Client: r.Client, Cluster: cluster, Machine: machine, - InfraCluster: infraCluster, + InfraCluster: clusterScope, IonosCloudMachine: ionosCloudMachine, Logger: &logger, }) @@ -117,37 +121,97 @@ func (r *IonosCloudMachineReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, err } - //// Always close the scope when exiting this function, so we can persist any ProxmoxMachine changes. - // defer func() { - // if err := machineScope.Close(); err != nil && reterr == nil { - // reterr = err - // } - // }() + defer func() { + if err := machineScope.Finalize(); err != nil { + reterr = errors.Join(err, reterr) + } + }() - machineService, err := service.NewMachineService(ctx, machineScope) + cloudService, err := cloud.NewService(ctx, machineScope) if err != nil { return ctrl.Result{}, fmt.Errorf("could not create machine service") } if !ionosCloudMachine.ObjectMeta.DeletionTimestamp.IsZero() { - return r.reconcileDelete(machineService) + return r.reconcileDelete(cloudService) } - return r.reconcileNormal(machineService) + return r.reconcileNormal(machineScope, clusterScope, cloudService) } -func (r *IonosCloudMachineReconciler) reconcileNormal(machineService *service.MachineService) (ctrl.Result, error) { - lan, err := machineService.GetLAN() - if err != nil { - return ctrl.Result{}, fmt.Errorf("could not reconcile LAN: %w", err) +func (r *IonosCloudMachineReconciler) isInfrastructureReady(machineScope *scope.MachineScope) bool { + // Make sure the infrastructure is ready. + if !machineScope.Cluster.Status.InfrastructureReady { + machineScope.Info("Cluster infrastructure is not ready yet") + conditions.MarkFalse( + machineScope.IonosCloudMachine, + infrav1.MachineProvisionedCondition, + infrav1.WaitingForClusterInfrastructureReason, + clusterv1.ConditionSeverityInfo, "") + + return false } - if lan == nil { - return ctrl.Result{Requeue: true}, nil + + // Make sure to wait until the data secret was created + if machineScope.Machine.Spec.Bootstrap.DataSecretName == nil { + machineScope.Info("Boostrap data secret is not available yet") + conditions.MarkFalse( + machineScope.IonosCloudMachine, + infrav1.MachineProvisionedCondition, + infrav1.WaitingForBootstrapDataReason, + clusterv1.ConditionSeverityInfo, "", + ) + + return false + } + + return true +} + +func (r *IonosCloudMachineReconciler) reconcileNormal(machineScope *scope.MachineScope, _ *scope.ClusterScope, cloudService *cloud.Service) (ctrl.Result, error) { + machineScope.V(4).Info("Reconciling IonosCloudMachine") + + if machineScope.HasFailed() { + machineScope.Info("Error state detected, skipping reconciliation") + return ctrl.Result{}, nil } + if !r.isInfrastructureReady(machineScope) { + return ctrl.Result{}, nil + } + + if controllerutil.AddFinalizer(machineScope.IonosCloudMachine, infrav1.MachineFinalizer) { + if err := machineScope.PatchObject(); err != nil { + machineScope.Error(err, "unable to update finalizer on object") + return ctrl.Result{}, err + } + } + + // TODO(lubedacht) Check before starting reconciliation if there is any pending request in the Ionos cluster or machine spec + // If there is, query for the request and check the status + // Status: + // * Done = Clear request from the status and continue reconciliation + // * Queued, Running => Requeue the current request + // * Failed => We need to discuss this, log error and continue (retry last request in the corresponding reconcile function) + + // Ensure that a lan is created in the datacenter + if requeue, err := cloudService.ReconcileLAN(); err != nil || requeue { + if requeue { + return ctrl.Result{RequeueAfter: time.Second * 30}, err + } + return ctrl.Result{}, fmt.Errorf("could not reconcile LAN %w", err) + } + + //if err != nil { + // return ctrl.Result{}, fmt.Errorf("could not reconcile LAN: %w", err) + //} + //if lan == nil { + // return ctrl.Result{Requeue: true}, nil + //} + return ctrl.Result{}, nil } -func (r *IonosCloudMachineReconciler) reconcileDelete(machineService *service.MachineService) (ctrl.Result, error) { +func (r *IonosCloudMachineReconciler) reconcileDelete(machineService *cloud.Service) (ctrl.Result, error) { isLANGone, err := machineService.DeleteLAN("placeholder for LAN ID") if err != nil { return ctrl.Result{}, fmt.Errorf("could not delete LAN: %w", err) @@ -169,7 +233,7 @@ func (r *IonosCloudMachineReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func (r *IonosCloudMachineReconciler) getInfraCluster( +func (r *IonosCloudMachineReconciler) getClusterScope( ctx context.Context, logger *logr.Logger, cluster *clusterv1.Cluster, ionosCloudMachine *infrav1.IonosCloudMachine, ) (*scope.ClusterScope, error) { var clusterScope *scope.ClusterScope diff --git a/internal/ionoscloud/client/client.go b/internal/ionoscloud/client/client.go index 1e8b8f44..08d735df 100644 --- a/internal/ionoscloud/client/client.go +++ b/internal/ionoscloud/client/client.go @@ -304,11 +304,13 @@ func (c *IonosCloudClient) GetRequests(ctx context.Context, method, path string) if method == "" { return nil, errors.New("method needs to be provided") } - yesterday := time.Now().Add(-24 * time.Hour).Format(time.DateTime) + + lookback := time.Now().Add(-24 * time.Hour).Format(time.DateTime) reqs, _, err := c.API.RequestsApi.RequestsGet(ctx). + Depth(3). FilterMethod(method). FilterUrl(path). - FilterCreatedAfter(yesterday). + FilterCreatedAfter(lookback). Execute() if err != nil { return nil, fmt.Errorf("failed to get requests: %w", err) @@ -318,6 +320,7 @@ func (c *IonosCloudClient) GetRequests(ctx context.Context, method, path string) // We invert the value to sort in descending order return -a.Metadata.CreatedDate.Compare(b.Metadata.CreatedDate.Time) }) + return &items, nil } diff --git a/internal/service/datacenter.go b/internal/service/cloud/datacenter.go similarity index 91% rename from internal/service/datacenter.go rename to internal/service/cloud/datacenter.go index 276cc5dd..d986519d 100644 --- a/internal/service/datacenter.go +++ b/internal/service/cloud/datacenter.go @@ -14,9 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -package service +package cloud // DataCenterID is a shortcut for getting the data center ID used by the IONOS Cloud machine. -func (s *MachineService) DataCenterID() string { +func (s *Service) DataCenterID() string { return s.scope.IonosCloudMachine.Spec.DatacenterID } diff --git a/internal/service/cloud/network.go b/internal/service/cloud/network.go new file mode 100644 index 00000000..f1efba20 --- /dev/null +++ b/internal/service/cloud/network.go @@ -0,0 +1,226 @@ +/* +Copyright 2024 IONOS Cloud. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloud + +import ( + "errors" + "fmt" + infrav1 "github.com/ionos-cloud/cluster-api-provider-ionoscloud/api/v1alpha1" + "k8s.io/apimachinery/pkg/util/json" + "k8s.io/utils/ptr" + "net/http" + "path" + "strings" + + sdk "github.com/ionos-cloud/sdk-go/v6" +) + +// LANName returns the name of the cluster LAN. +func (s *Service) LANName() string { + return fmt.Sprintf( + "k8s-lan-%s-%s", + s.scope.ClusterScope.Cluster.Namespace, + s.scope.ClusterScope.Cluster.Name) +} + +func (s *Service) ReconcileLAN() (requeue bool, err error) { + log := s.scope.Logger.WithName("ReconcileLAN") + + // try to retrieve the cluster lan + clusterLan, err := s.GetLAN() + if clusterLan != nil || err != nil { + // If we found the LAN, we don't need to create one. + // TODO(lubedacht) check if patching is required => future task. + return false, err + + } + + // if we didn't find a lan, we check if a lan is already in creation + requestStatus, err := s.checkForPendingLanRequest(http.MethodPost, "") + if err != nil { + return false, fmt.Errorf("unable to list pending lan requests: %w", err) + } + + // We want to requeue and check again after some time + if requestStatus == sdk.RequestStatusRunning || requestStatus == sdk.RequestStatusQueued { + return true, nil + } + + // check again as the request might be done right after we checked + // to prevent duplicate creation + if requestStatus == sdk.RequestStatusDone { + clusterLan, err = s.GetLAN() + if clusterLan != nil || err != nil { + return false, err + } + + // If we still don't get a lan here even though we found request, which was done + // the lan was probably deleted before. + // Therefore, we will attempt to create the lan again. + // + // TODO(lubedacht) + // Another solution would be to query for a deletion request and check if the created time + // is bigger than the created time of the lan POST request. + } + + log.V(4).Info("No lan was found. Creating new lan") + if err := s.CreateLAN(); err != nil { + return false, err + } + + // after creating the lan, we want to requeue and let the request be finished + return true, nil +} + +// GetLAN tries to retrieve the cluster related lan in the datacenter. +func (s *Service) GetLAN() (*sdk.Lan, error) { + // check if the Lan exists + lans, err := s.API().ListLANs(s.ctx, s.DataCenterID()) + if err != nil { + return nil, fmt.Errorf("could not list lans in datacenter %s: %w", s.DataCenterID(), err) + } + + var foundLan *sdk.Lan + for _, l := range *lans.Items { + if name := l.Properties.Name; name != nil && *l.Properties.Name == s.LANName() { + foundLan = &l + break + } + } + + return foundLan, nil +} + +func (s *Service) CreateLAN() error { + log := s.scope.Logger.WithName("CreateLAN") + + requestPath, err := s.API().CreateLAN(s.ctx, s.DataCenterID(), sdk.LanPropertiesPost{ + Name: ptr.To(s.LANName()), + Public: ptr.To(true), + }) + + if err != nil { + return fmt.Errorf("unable to create lan in datacenter %s: %w", s.DataCenterID(), err) + } + + s.scope.ClusterScope.IonosCluster.Status.PendingRequests[s.DataCenterID()] = &infrav1.ProvisioningRequest{ + Method: http.MethodPost, + RequestPath: requestPath, + State: infrav1.RequestStatusQueued, + } + + err = s.scope.ClusterScope.PatchObject() + if err != nil { + return fmt.Errorf("unable to patch the cluster: %w", err) + } + + log.WithValues("requestPath", requestPath).Info("Successfully requested for LAN creation") + + return nil +} + +// DeleteLAN deletes the lan used by the cluster. A bool indicates if the LAN still exists. +//func (s *Service) DeleteLAN(lanID string) (bool, error) { +// var err error +// log := s.scope.Logger.WithName("DestroyLAN") +// +// // Check for LAN deletion requests +// requestExists, err := s.checkForPendingLanRequest(http.MethodDelete, lanID) +// if err != nil { +// return false, fmt.Errorf("could not check if a LAN request exists: %w", err) +// } +// if requestExists { +// log.Info("the latest deletion request has not finished yet, so let's try again later.") +// return false, nil +// } +// // Search for LAN +// lan, err := s.API().GetLAN(s.ctx, s.DataCenterID(), lanID) +// if err != nil { +// return false, fmt.Errorf("could not check if LAN exists: %w", err) +// } +// if lan != nil && len(*lan.Entities.Nics.Items) > 0 { +// log.Info("the cluster still has more than node. skipping LAN deletion.") +// return false, nil +// } +// if lan == nil { +// log.Info("lan could not be found") +// return true, nil +// } +// // Destroy LAN +// log.Info("requesting deletion of LAN") +// requestPath, err := s.API().DestroyLAN(s.ctx, s.DataCenterID(), lanID) +// if err != nil { +// return false, fmt.Errorf("could not request deletion of LAN: %w", err) +// } +// log.WithValues("requestPath", requestPath).Info("successfully requested lan deletion.") +// return false, nil +//} + +// checkForPendingLanRequest checks if there is a request for the creation, update or deletion of a LAN in the data center. +// For update and deletion requests, it is also necessary to provide the LAN ID (value will be ignored for creation). +func (s *Service) checkForPendingLanRequest(method string, lanID string) (status string, err error) { + switch method { + default: + return "", fmt.Errorf("unsupported method %s, allowed methods are %s", method, strings.Join( + []string{http.MethodPost, http.MethodDelete, http.MethodPatch}, + ",", + )) + case http.MethodDelete, http.MethodPatch: + if lanID == "" { + return "", errors.New("lanID cannot be empty for DELETE and PATCH requests") + } + break + case http.MethodPost: + break + } + + lanPath := path.Join("datacenters", s.DataCenterID(), "lan") + requests, err := s.getPendingRequests(method, lanPath) + if err != nil { + return "", err + } + + for _, r := range requests { + if method != http.MethodPost { + id := *(*r.Metadata.RequestStatus.Metadata.Targets)[0].Target.Id + if id != lanID { + continue + } + } else { + var lan sdk.Lan + err = json.Unmarshal([]byte(*r.Properties.Body), &lan) + if err != nil { + return "", fmt.Errorf("could not unmarshal request into LAN: %w", err) + } + if *lan.Properties.Name != s.LANName() { + continue + } + } + + status := *r.Metadata.RequestStatus.Metadata.Status + + if status == sdk.RequestStatusFailed { + // We just log the error but not return it, so we can retry the request. + message := r.Metadata.RequestStatus.Metadata.Message + s.scope.Logger.WithValues("requestID", r.Id, "requestStatus", status). + Error(errors.New(*message), "last request for LAN has failed. logging it for debugging purposes") + } + + return status, nil + } + return "", nil +} diff --git a/internal/service/cloud/request.go b/internal/service/cloud/request.go new file mode 100644 index 00000000..fcb7d868 --- /dev/null +++ b/internal/service/cloud/request.go @@ -0,0 +1,35 @@ +/* +Copyright 2024 IONOS Cloud. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloud + +import ( + "fmt" + sdk "github.com/ionos-cloud/sdk-go/v6" +) + +func (s *Service) getPendingRequests(method, resourcePath string) ([]sdk.Request, error) { + requests, err := s.API().GetRequests(s.ctx, method, resourcePath) + if err != nil { + return nil, fmt.Errorf("could not get requests: %w", err) + } + + if requests == nil { + return nil, nil + } + + return *requests, nil +} diff --git a/internal/service/service.go b/internal/service/cloud/service.go similarity index 64% rename from internal/service/service.go rename to internal/service/cloud/service.go index 44ae0e7e..c72a43a5 100644 --- a/internal/service/service.go +++ b/internal/service/cloud/service.go @@ -14,8 +14,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package service offers infra resources services for IONOS Cloud machine reconciliation. -package service +// Package cloud offers infra resources services for IONOS Cloud machine reconciliation. +package cloud import ( "context" @@ -25,24 +25,24 @@ import ( "github.com/ionos-cloud/cluster-api-provider-ionoscloud/scope" ) -// MachineService offers infra resources services for IONOS Cloud machine reconciliation. -type MachineService struct { +// Service offers infra resources services for IONOS Cloud machine reconciliation. +type Service struct { scope *scope.MachineScope ctx context.Context } -// NewMachineService returns a new MachineService. -func NewMachineService(ctx context.Context, s *scope.MachineScope) (*MachineService, error) { +// NewService returns a new Service. +func NewService(ctx context.Context, s *scope.MachineScope) (*Service, error) { if s == nil { - return nil, errors.New("machine service cannot use a nil machine scope") + return nil, errors.New("cloud service cannot use a nil machine scope") } - return &MachineService{ + return &Service{ scope: s, ctx: ctx, }, nil } // API is a shortcut for the IONOS Cloud Client. -func (s *MachineService) API() ionoscloud.Client { +func (s *Service) API() ionoscloud.Client { return s.scope.ClusterScope.IonosClient } diff --git a/internal/service/network.go b/internal/service/network.go deleted file mode 100644 index 121bc9f5..00000000 --- a/internal/service/network.go +++ /dev/null @@ -1,167 +0,0 @@ -/* -Copyright 2024 IONOS Cloud. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package service - -import ( - "encoding/json" - "errors" - "fmt" - "net/http" - "net/url" - "strings" - - sdk "github.com/ionos-cloud/sdk-go/v6" - "k8s.io/utils/pointer" -) - -// LANName returns the name of the cluster LAN. -func (s *MachineService) LANName() string { - return fmt.Sprintf( - "k8s-lan-%s-%s", - s.scope.ClusterScope.Cluster.Namespace, - s.scope.ClusterScope.Cluster.Name) -} - -// GetLAN ensures a LAN is created and returns it if available. -func (s *MachineService) GetLAN() (*sdk.Lan, error) { - var err error - log := s.scope.Logger.WithName("GetLAN") - - // Check for LAN creation requests - requestExists, err := s.lanRequestExists(http.MethodPost, "") - if err != nil { - return nil, fmt.Errorf("could not check if a LAN request exists") - } - if requestExists { - return nil, nil - } - // Search for LAN - lans, err := s.API().ListLANs(s.ctx, s.DataCenterID()) - if err != nil { - return nil, fmt.Errorf("could not list LANs in data center") - } - for _, l := range *lans.Items { - if name := l.Properties.Name; name != nil && *name == s.LANName() { - return &l, nil - } - } - // Create LAN - log.Info("no LAN was found. requesting creation of a new one") - requestPath, err := s.API().CreateLAN(s.ctx, s.DataCenterID(), sdk.LanPropertiesPost{ - Name: pointer.String(s.LANName()), - Public: pointer.Bool(true), - }) - if err != nil { - return nil, fmt.Errorf("could not request LAN creation: %w", err) - } - log.WithValues("requestPath", requestPath).Info("Successfully requested for LAN creation") - return nil, nil -} - -// DeleteLAN deletes the lan used by the cluster. A bool indicates if the LAN still exists. -func (s *MachineService) DeleteLAN(lanID string) (bool, error) { - var err error - log := s.scope.Logger.WithName("DestroyLAN") - - // Check for LAN deletion requests - requestExists, err := s.lanRequestExists(http.MethodDelete, lanID) - if err != nil { - return false, fmt.Errorf("could not check if a LAN request exists: %w", err) - } - if requestExists { - log.Info("the latest deletion request has not finished yet, so let's try again later.") - return false, nil - } - // Search for LAN - lan, err := s.API().GetLAN(s.ctx, s.DataCenterID(), lanID) - if err != nil { - return false, fmt.Errorf("could not check if LAN exists: %w", err) - } - if lan != nil && len(*lan.Entities.Nics.Items) > 0 { - log.Info("the cluster still has more than node. skipping LAN deletion.") - return false, nil - } - if lan == nil { - log.Info("lan could not be found") - return true, nil - } - // Destroy LAN - log.Info("requesting deletion of LAN") - requestPath, err := s.API().DestroyLAN(s.ctx, s.DataCenterID(), lanID) - if err != nil { - return false, fmt.Errorf("could not request deletion of LAN: %w", err) - } - log.WithValues("requestPath", requestPath).Info("successfully requested lan deletion.") - return false, nil -} - -// lanRequestExists checks if there is a request for the creation or deletion of a LAN in the data center. -// For deletion requests, it is also necessary to provide the LAN ID (value will be ignored for creation). -func (s *MachineService) lanRequestExists(method string, lanID string) (bool, error) { - if method != http.MethodPost && method != http.MethodDelete { - return false, fmt.Errorf("invalid method %s (only POST and DELETE are valid)", method) - } - if method == http.MethodDelete && lanID == "" { - return false, fmt.Errorf("when method is DELETE, lanID cannot be empty") - } - - lanPath, err := url.JoinPath("datacenter", s.scope.IonosCloudMachine.Spec.DatacenterID, "lan") - if err != nil { - return false, fmt.Errorf("could not generate datacenter/{dataCenterID}/lan path: %w", err) - } - requests, err := s.API().GetRequests(s.ctx, method, lanPath) - if err != nil { - return false, fmt.Errorf("could not get requests: %w", err) - } - for _, r := range *requests { - if method == "POST" { - var lan sdk.Lan - err = json.Unmarshal([]byte(*r.Properties.Body), &lan) - if err != nil { - return false, fmt.Errorf("could not unmarshal request into LAN: %w", err) - } - if *lan.Properties.Name != s.LANName() { - continue - } - } else if method == "DELETE" { - u, err := url.Parse(*r.Properties.Url) - if err != nil { - return false, fmt.Errorf("could not format url: %w", err) - } - lanIDPath, err := url.JoinPath(lanPath, lanID) - if err != nil { - return false, fmt.Errorf("could not generate lanPath for lan resource: %w", err) - } - - if !strings.HasSuffix(u.Path, lanIDPath) { - continue - } - } - status := *r.Metadata.RequestStatus.Metadata.Status - if status == sdk.RequestStatusFailed { - message := r.Metadata.RequestStatus.Metadata.Message - s.scope.Logger.WithValues("requestID", r.Id, "requestStatus", status). - Error(errors.New(*message), "last request for LAN has failed. logging it for debugging purposes") - // We just log the error but not return it, so we can retry the request. - return false, nil - } - if status == sdk.RequestStatusQueued || status == sdk.RequestStatusRunning { - return true, nil - } - } - return false, nil -} diff --git a/scope/cluster.go b/scope/cluster.go index c4b035c4..355289ac 100644 --- a/scope/cluster.go +++ b/scope/cluster.go @@ -98,9 +98,9 @@ func NewClusterScope(params ClusterScopeParams) (*ClusterScope, error) { return clusterScope, nil } -// patchObject will apply all changes from the IonosCloudCluster. +// PatchObject will apply all changes from the IonosCloudCluster. // It will also make sure to patch the status subresource. -func (c *ClusterScope) patchObject() error { +func (c *ClusterScope) PatchObject() error { // always set the ready condition conditions.SetSummary(c.IonosCluster, conditions.WithConditions(infrav1.IonosCloudClusterReady)) @@ -125,5 +125,5 @@ func (c *ClusterScope) Finalize() error { return retry.OnError( retry.DefaultBackoff, shouldRetry, - c.patchObject) + c.PatchObject) } diff --git a/scope/machine.go b/scope/machine.go index 4f42bdc9..dc84fe5c 100644 --- a/scope/machine.go +++ b/scope/machine.go @@ -20,6 +20,8 @@ import ( "context" "errors" "fmt" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/cluster-api/util/conditions" "github.com/go-logr/logr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -88,3 +90,33 @@ func NewMachineScope(params MachineScopeParams) (*MachineScope, error) { IonosCloudMachine: params.IonosCloudMachine, }, nil } + +func (m *MachineScope) HasFailed() bool { + status := m.IonosCloudMachine.Status + return status.FailureReason != nil || status.FailureMessage != nil +} + +func (m *MachineScope) PatchObject() error { + conditions.SetSummary(m.IonosCloudMachine, + conditions.WithConditions( + infrav1.MachineProvisionedCondition)) + + return m.patchHelper.Patch( + context.TODO(), + m.IonosCloudMachine, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.ReadyCondition, + infrav1.MachineProvisionedCondition, + }}) +} + +func (m *MachineScope) Finalize() error { + // NOTE(lubedacht) retry is only a way to reduce the failure chance, + // but in general, the reconciliation logic must be resilient + // to handle an outdated resource from that API server. + shouldRetry := func(error) bool { return true } + return retry.OnError( + retry.DefaultBackoff, + shouldRetry, + m.PatchObject) +}