Skip to content

Commit

Permalink
fix: wipe and reboot with talos client on removing server
Browse files Browse the repository at this point in the history
Ref: siderolabs#1294 siderolabs#1285
all deleted machine will receive a reset call from talos client

Signed-off-by: Anthony ARNAUD <[email protected]>
  • Loading branch information
aarnaud committed Feb 8, 2024
1 parent ea8e126 commit 106e3e1
Showing 1 changed file with 50 additions and 0 deletions.
50 changes: 50 additions & 0 deletions app/caps-controller-manager/controllers/metalmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ import (
"context"
"errors"
"fmt"
"github.com/siderolabs/sidero/app/sidero-controller-manager/api/v1alpha1"

"github.com/go-logr/logr"
"github.com/siderolabs/go-pointer"
talosclient "github.com/siderolabs/talos/pkg/machinery/client"
clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/fields"
Expand Down Expand Up @@ -253,6 +256,9 @@ func (r *MetalMachineReconciler) reconcileDelete(ctx context.Context, metalMachi

err := r.Get(ctx, types.NamespacedName{Namespace: metalMachine.Spec.ServerRef.Namespace, Name: metalMachine.Spec.ServerRef.Name}, &serverBinding)
if err == nil {
if err = r.ResetServer(ctx, metalMachine, &serverBinding); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, r.Delete(ctx, &serverBinding)
}

Expand Down Expand Up @@ -467,3 +473,47 @@ func (r *MetalMachineReconciler) fetchServerClass(ctx context.Context, classRef

return serverClassResource, nil
}

func (r *MetalMachineReconciler) ResetServer(ctx context.Context, metalMachine *infrav1.MetalMachine, serverBinding *infrav1.ServerBinding) error {
var talosSecret corev1.Secret
var serverSpec v1alpha1.ServerSpec

cluster, err := util.GetClusterFromMetadata(ctx, r.Client, metalMachine.ObjectMeta)
if err != nil {
return fmt.Errorf("no cluster label or cluster does not exist")
}

if err = r.Get(ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: fmt.Sprintf("%s-talosconfig", cluster.Name)}, &talosSecret); err != nil {
return err
}

config, ok := talosSecret.Data["talosconfig"]
if !ok {
return fmt.Errorf("failed to find talosconfig data in the talosconfig secret")
}

var clientConfig *clientconfig.Config
clientConfig, err = clientconfig.FromBytes(config)

if err != nil {
return err
}

var talosClient *talosclient.Client
talosClient, err = talosclient.New(ctx,
talosclient.WithConfig(clientConfig),
talosclient.WithEndpoints(serverBinding.Spec.Addresses...),
)
if err != nil {
return err
}

// ignore error if the machine is already reset, reboot, offline, but record event
if err = talosClient.Reset(ctx, false, true); err != nil {
r.Recorder.Event(metalMachine.Spec.ServerRef, corev1.EventTypeWarning, "Server Allocation", fmt.Sprintf("Software reset failed on %q, %s", metalMachine.Name, err))
} else {
r.Recorder.Event(metalMachine.Spec.ServerRef, corev1.EventTypeNormal, "Server Allocation", fmt.Sprintf("Software reset called on %q", metalMachine.Name))
}

return nil
}

0 comments on commit 106e3e1

Please sign in to comment.