From 050174391c83b136d112a450b50055021d18d442 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Wed, 16 Oct 2024 21:44:38 -0700 Subject: [PATCH] qmanager: add cleanup cancel upon receipt of final .free RPC Problem: the final .free RPC does not free brokerless resources (e.g., rack-local ssds) with the current implementation of partial cancel. Add a full, cleanup cancel upon receipt of the final .free RPC. While exhibiting slighlty lower performance for a sequence of `.free` RPCs than supporting brokerless resource release in partial cancel, the full cancel is not subject to errors under various pruning filter configurations. Handling and preventing the edge-case errors will introduce significant complexity into the traverser and planner, and require updates to REAPI. We can revisit that implementation in the future if required by performance needs. --- qmanager/policies/base/queue_policy_base.hpp | 48 +++++++++++--------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/qmanager/policies/base/queue_policy_base.hpp b/qmanager/policies/base/queue_policy_base.hpp index 6fa2e44df..d961aa84b 100644 --- a/qmanager/policies/base/queue_policy_base.hpp +++ b/qmanager/policies/base/queue_policy_base.hpp @@ -646,14 +646,28 @@ class queue_policy_base_t : public resource_model::queue_adapter_base_t { case job_state_kind_t::ALLOC_RUNNING: // deliberately fall through case job_state_kind_t::RUNNING: - if (cancel (h, job_it->second->id, R, true, full_removal) != 0) { - flux_log_error (flux_h, - "%s: .free RPC partial cancel failed for jobid " - "%jd", - __FUNCTION__, - static_cast (id)); - errno = EINVAL; - goto out; + if (!final) { + if (cancel (h, job_it->second->id, R, true, full_removal) != 0) { + flux_log_error (flux_h, + "%s: .free RPC partial cancel failed for jobid " + "%jd", + __FUNCTION__, + static_cast (id)); + errno = EINVAL; + goto out; + } + } else { + // Run a full cancel to clean up all remaining allocated resources + if (cancel (h, job_it->second->id, true) != 0) { + flux_log_error (flux_h, + "%s: .free RPC full cancel failed for jobid " + "%jd", + __FUNCTION__, + static_cast (id)); + errno = EPROTO; + goto out; + } + full_removal = true; } // We still want to run the sched loop even if there's an inconsistent state set_schedulability (true); @@ -666,24 +680,16 @@ class queue_policy_base_t : public resource_model::queue_adapter_base_t { // during cancel auto job_sp = job_it->second; m_jobs.erase (job_it); - if (final && !full_removal) { - // This error condition indicates a discrepancy between core and sched. + if (full_removal && !final) { + // This error condition can indicate a discrepancy between core and sched, + // but commonly indicates partial cancel didn't clean up resources external + // to a broker rank (e.g., ssds). flux_log_error (flux_h, - "%s: Final .free RPC failed to remove all resources for " + "%s: removed all resources before final .free RPC for " "jobid " "%jd", __FUNCTION__, static_cast (id)); - // Run a full cancel to clean up all remaining allocated resources - if (cancel (h, job_sp->id, true) != 0) { - flux_log_error (flux_h, - "%s: .free RPC full cancel failed for jobid " - "%jd", - __FUNCTION__, - static_cast (id)); - } - errno = EPROTO; - goto out; } } break;