Skip to content

Commit d3b0ed0

Browse files
committed
[ws-daemon] Make sure to cleanup all workspace state for rejected pods
1 parent ac1c86b commit d3b0ed0

File tree

3 files changed

+52
-0
lines changed

3 files changed

+52
-0
lines changed

components/ws-daemon/pkg/controller/workspace_controller.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,25 @@ func (wsc *WorkspaceController) handleWorkspaceStop(ctx context.Context, ws *wor
227227
span, ctx := opentracing.StartSpanFromContext(ctx, "handleWorkspaceStop")
228228
defer tracing.FinishSpan(span, &err)
229229

230+
if ws.IsConditionTrue(workspacev1.WorkspaceConditionPodRejected) {
231+
// in this case we are not interested in any backups, but instead are concerned with completely wiping all state that might be dangling somewhere
232+
log.Info("handling workspace stop - wiping mode")
233+
err = wsc.operations.WipeWorkspace(ctx, ws.Name)
234+
if err != nil {
235+
wsc.emitEvent(ws, "Wiping", fmt.Errorf("failed to wipe workspace: %w", err))
236+
return ctrl.Result{}, fmt.Errorf("failed to wipe workspace: %w", err)
237+
}
238+
239+
return ctrl.Result{}, nil
240+
}
241+
242+
// regular case
243+
return wsc.doWorkspaceContentBackup(ctx, span, ws, req)
244+
}
245+
246+
func (wsc *WorkspaceController) doWorkspaceContentBackup(ctx context.Context, span opentracing.Span, ws *workspacev1.Workspace, req ctrl.Request) (result ctrl.Result, err error) {
247+
log := log.FromContext(ctx)
248+
230249
if c := wsk8s.GetCondition(ws.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady)); c == nil || c.Status == metav1.ConditionFalse {
231250
return ctrl.Result{}, fmt.Errorf("workspace content was never ready")
232251
}

components/ws-daemon/pkg/controller/workspace_operations.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ type WorkspaceOperations interface {
6666
BackupWorkspace(ctx context.Context, opts BackupOptions) (*csapi.GitStatus, error)
6767
// DeleteWorkspace deletes the content of the workspace from disk
6868
DeleteWorkspace(ctx context.Context, instanceID string) error
69+
// WipeWorkspace deletes all references to the workspace. Does not fail if parts are already gone, or state is incosistent.
70+
WipeWorkspace(ctx context.Context, instanceID string) error
6971
// SnapshotIDs generates the name and url for a snapshot
7072
SnapshotIDs(ctx context.Context, instanceID string) (snapshotUrl, snapshotName string, err error)
7173
// Snapshot takes a snapshot of the workspace
@@ -285,6 +287,36 @@ func (wso *DefaultWorkspaceOperations) DeleteWorkspace(ctx context.Context, inst
285287
return nil
286288
}
287289

290+
func (wso *DefaultWorkspaceOperations) WipeWorkspace(ctx context.Context, instanceID string) error {
291+
ws, err := wso.provider.GetAndConnect(ctx, instanceID)
292+
if err != nil {
293+
// we have to assume everything is fine, and this workspace has already been completely wiped
294+
return nil
295+
}
296+
297+
if err = ws.Dispose(ctx, wso.provider.hooks[session.WorkspaceDisposed]); err != nil {
298+
glog.WithError(err).WithFields(ws.OWI()).Error("cannot dispose session")
299+
return err
300+
}
301+
302+
// remove workspace daemon directory in the node
303+
if err := os.RemoveAll(ws.ServiceLocDaemon); err != nil {
304+
glog.WithError(err).WithFields(ws.OWI()).Error("cannot delete workspace daemon directory")
305+
return err
306+
}
307+
308+
// remove workspace daemon node directory in the node
309+
// TODO(gpl): Is this used at all? Can't find any reference
310+
if err := os.RemoveAll(ws.ServiceLocNode); err != nil {
311+
glog.WithError(err).WithFields(ws.OWI()).Error("cannot delete workspace daemon node directory")
312+
return err
313+
}
314+
315+
wso.provider.Remove(ctx, instanceID)
316+
317+
return nil
318+
}
319+
288320
func (wso *DefaultWorkspaceOperations) SnapshotIDs(ctx context.Context, instanceID string) (snapshotUrl, snapshotName string, err error) {
289321
sess, err := wso.provider.GetAndConnect(ctx, instanceID)
290322
if err != nil {

components/ws-manager-mk2/controllers/workspace_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
224224
workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionPodRejected(fmt.Sprintf("Pod reached maximum recreations %d, failing", workspace.Status.PodRecreated), metav1.ConditionFalse))
225225
return ctrl.Result{Requeue: true}, nil // requeue so we end up in the "Stopped" case below
226226
}
227+
log.WithValues("PodStarts", workspace.Status.PodStarts, "PodRecreated", workspace.Status.PodRecreated, "Phase", workspace.Status.Phase).Info("trigger pod recreation")
227228

228229
// Must persist the modification pod starts, and ensure we retry on conflict.
229230
// If we fail to persist this value, it's possible that the Pod gets recreated endlessly

0 commit comments

Comments
 (0)