Skip to content

Commit

Permalink
fix: detached volume replicas not evicted
Browse files Browse the repository at this point in the history
longhorn/longhorn-9781
longhorn/longhorn-9857

Signed-off-by: Chin-Ya Huang <[email protected]>
(cherry picked from commit bb0a41d)
  • Loading branch information
c3y1huang committed Dec 4, 2024
1 parent 7bb340c commit 15bbeb5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 5 deletions.
38 changes: 33 additions & 5 deletions controller/volume_eviction_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (

"github.com/longhorn/longhorn-manager/constant"
"github.com/longhorn/longhorn-manager/datastore"
"github.com/longhorn/longhorn-manager/scheduler"
"github.com/longhorn/longhorn-manager/util"

longhorn "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2"
)
Expand All @@ -38,6 +40,7 @@ type VolumeEvictionController struct {

ds *datastore.DataStore
cacheSyncs []cache.InformerSynced
scheduler *scheduler.ReplicaScheduler
}

func NewVolumeEvictionController(
Expand All @@ -63,6 +66,8 @@ func NewVolumeEvictionController(
eventRecorder: eventBroadcaster.NewRecorder(scheme, corev1.EventSource{Component: "longhorn-volume-eviction-controller"}),
}

vec.scheduler = scheduler.NewReplicaScheduler(ds)

var err error
if _, err = ds.VolumeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: vec.enqueueVolume,
Expand Down Expand Up @@ -167,10 +172,6 @@ func (vec *VolumeEvictionController) reconcile(volName string) (err error) {
return nil
}

if vol.Spec.NodeID == "" {
return nil
}

if !vec.isResponsibleFor(vol) {
return nil
}
Expand Down Expand Up @@ -205,14 +206,41 @@ func (vec *VolumeEvictionController) reconcile(volName string) (err error) {
evictingAttachmentTicketID := longhorn.GetAttachmentTicketID(longhorn.AttacherTypeVolumeEvictionController, volName)

if hasReplicaEvictionRequested(replicas) {
createOrUpdateAttachmentTicket(va, evictingAttachmentTicketID, vol.Status.OwnerID, longhorn.AnyValue, longhorn.AttacherTypeVolumeEvictionController)
if vec.hasDiskCandidateForReplicaEviction(replicas, vol) {
createOrUpdateAttachmentTicket(va, evictingAttachmentTicketID, vol.Status.OwnerID, longhorn.AnyValue, longhorn.AttacherTypeVolumeEvictionController)
}
} else {
delete(va.Spec.AttachmentTickets, evictingAttachmentTicketID)
}

return nil
}

func (vec *VolumeEvictionController) hasDiskCandidateForReplicaEviction(replicas map[string]*longhorn.Replica, volume *longhorn.Volume) bool {
for _, replica := range replicas {
replicaCopy := replica.DeepCopy()
replicaCopy.Spec.HardNodeAffinity = ""

diskCandidates, multiError, err := vec.scheduler.FindDiskCandidates(replicaCopy, replicas, volume)
if err != nil {
vec.logger.WithError(err).Warnf("Failed to find disk candidates for evicting replica %q", replica.Name)
return false
}

if len(diskCandidates) == 0 {
aggregatedReplicaScheduledError := util.NewMultiError(longhorn.ErrorReplicaScheduleEvictReplicaFailed)
if multiError != nil {
aggregatedReplicaScheduledError.Append(multiError)
}
vec.logger.Warnf("No disk candidates for evicting replica %q: %v", replica.Name, aggregatedReplicaScheduledError.Join())
return false
}
}

vec.logger.Infof("Found disk candidates for evicting replicas of volume %q", volume.Name)
return true
}

func (vec *VolumeEvictionController) isResponsibleFor(vol *longhorn.Volume) bool {
return vec.controllerID == vol.Status.OwnerID
}
1 change: 1 addition & 0 deletions k8s/pkg/apis/longhorn/v1beta2/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ const (
ErrorReplicaScheduleEngineImageNotReady = "none of the node candidates contains a ready engine image"
ErrorReplicaScheduleHardNodeAffinityNotSatisfied = "hard affinity cannot be satisfied"
ErrorReplicaScheduleSchedulingFailed = "replica scheduling failed"
ErrorReplicaScheduleEvictReplicaFailed = "evict replica failed"
)

type DiskType string
Expand Down

0 comments on commit 15bbeb5

Please sign in to comment.