From 52a7610e867068147fac4cca4eb293f0e39f92a8 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Fri, 7 Jul 2023 18:16:59 -0700 Subject: [PATCH] traverser: prevent allocation of currently allocated resources Problem: issue https://github.com/flux-framework/flux-sched/issues/1043 identified a scenario where Fluxion will grant a new allocation to a job while the resources are still occupied by the previous allocation. The double booking occurs due to the assumption Fluxion makes that a job will not run beyond its walltime. However, as the issue describes, an epilog script may cause a job to run beyond its walltime. Since Fluxion doesn't receive a `free` message until the epilog completes, the allocation remains in the resource graph but the scheduled point at allocation completion is exceeded, allowing the resources to be allocated to another job. There are other common scenarios that can lead to multiple concurrent allocations, such as a job getting stuck in CLEANUP. Add a check for an existing allocation on each exclusive resource vertex for allocation traversals during graph traversal pruning. This prevents another job from receiving the resources and allows reservations and satisfiability checks to complete. --- resource/traversers/dfu_impl.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/resource/traversers/dfu_impl.cpp b/resource/traversers/dfu_impl.cpp index 39f8e7f0d..009bcd066 100644 --- a/resource/traversers/dfu_impl.cpp +++ b/resource/traversers/dfu_impl.cpp @@ -124,6 +124,17 @@ int dfu_impl_t::by_excl (const jobmeta_t &meta, const std::string &s, vtx_t u, // requested, we check the validity of the visiting vertex using // its x_checker planner. if (exclusive_in || resource.exclusive == Jobspec::tristate_t::TRUE) { + // If it's exclusive, the traversal type is an allocation, and + // there are no other allocations on the vertex, then proceed. This + // check prevents the observed multiple booking issue, where + // resources with jobs running beyond their walltime can be + // allocated to another job since the planner considers them + // available. Note: if Fluxion needs to support shared + // resources at the leaf level this check will not catch + // multiple booking. + if (meta.alloc_type == jobmeta_t::alloc_type_t::AT_ALLOC && + !(*m_graph)[u].schedule.allocations.empty ()) + goto done; errno = 0; p = (*m_graph)[u].idata.x_checker; njobs = planner_avail_resources_during (p, at, duration);