Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: detached volume replicas not evicted #3293

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 33 additions & 5 deletions controller/volume_eviction_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (

"github.com/longhorn/longhorn-manager/constant"
"github.com/longhorn/longhorn-manager/datastore"
"github.com/longhorn/longhorn-manager/scheduler"
"github.com/longhorn/longhorn-manager/util"

longhorn "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2"
)
Expand All @@ -38,6 +40,7 @@ type VolumeEvictionController struct {

ds *datastore.DataStore
cacheSyncs []cache.InformerSynced
scheduler *scheduler.ReplicaScheduler
}

func NewVolumeEvictionController(
Expand All @@ -63,6 +66,8 @@ func NewVolumeEvictionController(
eventRecorder: eventBroadcaster.NewRecorder(scheme, corev1.EventSource{Component: "longhorn-volume-eviction-controller"}),
}

vec.scheduler = scheduler.NewReplicaScheduler(ds)

var err error
if _, err = ds.VolumeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: vec.enqueueVolume,
Expand Down Expand Up @@ -167,10 +172,6 @@ func (vec *VolumeEvictionController) reconcile(volName string) (err error) {
return nil
}

if vol.Spec.NodeID == "" {
return nil
}

if !vec.isResponsibleFor(vol) {
return nil
}
Expand Down Expand Up @@ -205,14 +206,41 @@ func (vec *VolumeEvictionController) reconcile(volName string) (err error) {
evictingAttachmentTicketID := longhorn.GetAttachmentTicketID(longhorn.AttacherTypeVolumeEvictionController, volName)

if hasReplicaEvictionRequested(replicas) {
createOrUpdateAttachmentTicket(va, evictingAttachmentTicketID, vol.Status.OwnerID, longhorn.AnyValue, longhorn.AttacherTypeVolumeEvictionController)
if vec.hasDiskCandidateForReplicaEviction(replicas, vol) {
createOrUpdateAttachmentTicket(va, evictingAttachmentTicketID, vol.Status.OwnerID, longhorn.AnyValue, longhorn.AttacherTypeVolumeEvictionController)
}
} else {
delete(va.Spec.AttachmentTickets, evictingAttachmentTicketID)
}

return nil
}

func (vec *VolumeEvictionController) hasDiskCandidateForReplicaEviction(replicas map[string]*longhorn.Replica, volume *longhorn.Volume) bool {
for _, replica := range replicas {
replicaCopy := replica.DeepCopy()
replicaCopy.Spec.HardNodeAffinity = ""

diskCandidates, multiError, err := vec.scheduler.FindDiskCandidates(replicaCopy, replicas, volume)
if err != nil {
vec.logger.WithError(err).Warnf("Failed to find disk candidates for evicting replica %q", replica.Name)
return false
}

if len(diskCandidates) == 0 {
aggregatedReplicaScheduledError := util.NewMultiError(longhorn.ErrorReplicaScheduleEvictReplicaFailed)
if multiError != nil {
aggregatedReplicaScheduledError.Append(multiError)
}
vec.logger.Warnf("No disk candidates for evicting replica %q: %v", replica.Name, aggregatedReplicaScheduledError.Join())
return false
}
}

vec.logger.Infof("Found disk candidates for evicting replicas of volume %q", volume.Name)
return true
}

func (vec *VolumeEvictionController) isResponsibleFor(vol *longhorn.Volume) bool {
return vec.controllerID == vol.Status.OwnerID
}
1 change: 1 addition & 0 deletions k8s/pkg/apis/longhorn/v1beta2/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (
ErrorReplicaScheduleHardNodeAffinityNotSatisfied = "hard affinity cannot be satisfied"
ErrorReplicaScheduleSchedulingFailed = "replica scheduling failed"
ErrorReplicaSchedulePrecheckNewReplicaFailed = "precheck new replica failed"
ErrorReplicaScheduleEvictReplicaFailed = "evict replica failed"
)

type DiskType string
Expand Down
Loading