Skip to content

Commit

Permalink
testsuite: add flux-core issue test for housekeeping
Browse files Browse the repository at this point in the history
Problem: there is no test for flux-core issue
flux-framework/flux-core#6179.

Add tests in the Fluxion issues directory.
  • Loading branch information
milroy committed Aug 10, 2024
1 parent 28adbeb commit d46faea
Showing 1 changed file with 125 additions and 0 deletions.
125 changes: 125 additions & 0 deletions t/issues/t6179-flux-core-housekeeping.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/bin/bash
#
# Ensure fluxion calc_factor executes without overflow
#

log() { printf "flux-coreissue#6179: $@\n" >&2; }

cat <<'EOF' >free.py
import flux
import json
import sys
import subprocess as sp
from pprint import pprint
jobid = int(sp.check_output(['flux', 'job', 'id', sys.argv[1]]).decode())
r_obj=json.loads(sp.check_output(['flux', 'job', 'info', sys.argv[1], 'R']).decode())
obj = {'id': jobid, 'R': r_obj, 'final': True}
pprint(obj)
flux.Flux().rpc('sched.free', obj)
sys.exit(0)
EOF

cat <<'EOF' >incomplete-free.py
import flux
import json
import sys
import subprocess as sp
from pprint import pprint
jobid = int(sp.check_output(['flux', 'job', 'id', sys.argv[1]]).decode())
R_str = '{"version": 1, "execution": {"R_lite": [{"rank": "1", "children": {"core": "0-15", "gpu": "0-3"}}], "nodelist": ["node1"]}}'
r_obj = json.loads(R_str)
obj = {'id': jobid, 'R': r_obj, 'final': True}
pprint(obj)
flux.Flux().rpc('sched.free', obj)
sys.exit(0)
EOF

cat <<'EOF' >R
{"version": 1, "execution": {"R_lite": [{"rank": "0-1", "children": {"core": "0-15", "gpu": "0-3"}}], "nodelist": ["node0", "node1"]}}
EOF

cat <<EOF >flux.config
[sched-fluxion-resource]
match-policy = "lonodex"
match-format = "rv1_nosched"
[resource]
noverify = true
norestrict = true
path = "R"
EOF

log "Unloading modules..."
flux module remove sched-simple
flux module remove resource

flux config load flux.config

flux module load resource monitor-force-up
flux module load sched-fluxion-resource
flux module load sched-fluxion-qmanager queue-policy="easy"
flux queue start --all --quiet
flux resource list
flux resource status
flux module list

log "Running test job 1"
jobid1=$(flux submit -N2 -t 1h --setattr=exec.test.run_duration=1m sleep inf)
log "Sending final RPC for job 1"
flux python ./free.py ${jobid1}
# Need to execute cancel to remove from job manager
flux cancel ${jobid1}
flux job wait-event -t 5 ${jobid1} release

log "Running test job 2"
jobid2=$(flux submit -N2 -t 1h --setattr=exec.test.run_duration=1m sleep inf)
log "Sending final RPC for job 2"
flux python ./incomplete-free.py ${jobid2}
# Need to execute cancel to remove from job manager
flux cancel ${jobid2}
flux job wait-event -t 5 ${jobid2} release
flux jobs -a

log "reloading sched-simple..."
flux module remove sched-fluxion-qmanager
flux module remove sched-fluxion-resource
flux module load sched-simple

log "Unloading modules for FCFS test..."
flux module remove sched-simple
flux module remove resource

flux config load flux.config

flux module load resource monitor-force-up
flux module load sched-fluxion-resource
flux module load sched-fluxion-qmanager queue-policy="fcfs"
flux queue start --all --quiet
flux resource list
flux resource status
flux module list

log "Running test job 3"
jobid3=$(flux submit -N2 -t 1h --setattr=exec.test.run_duration=1m sleep inf)
log "Sending final RPC for job 3"
flux python ./free.py ${jobid3}
# Need to execute cancel to remove from job manager
flux cancel ${jobid3}
flux job wait-event -t 5 ${jobid3} release

log "Running test job 4"
jobid4=$(flux submit -N2 -t 1h --setattr=exec.test.run_duration=1m sleep inf)
log "Sending final RPC for job 4"
flux python ./incomplete-free.py ${jobid4}
# Need to execute cancel to remove from job manager
flux cancel ${jobid4}
flux job wait-event -t 5 ${jobid4} release
flux jobs -a

log "reloading sched-simple..."
flux module remove sched-fluxion-qmanager
flux module remove sched-fluxion-resource
flux module load sched-simple

0 comments on commit d46faea

Please sign in to comment.