Skip to content

Commit

Permalink
Merge pull request #95 from eth-cscs/dev
Browse files Browse the repository at this point in the history
Enabling SLURM spank plugin's selection
  • Loading branch information
jpdorsch authored Apr 15, 2021
2 parents 238cb96 + c041e42 commit 5a0c2df
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 8 deletions.
4 changes: 4 additions & 0 deletions deploy/demo/common/common.env
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ F7T_SYSTEMS_INTERNAL_UTILITIES='192.168.220.12:22;192.168.220.12:22'
# Base filesystem where job submission files will be stored.
# ; separated for system
F7T_COMPUTE_BASE_FS="/home;/home"
# enables the use of a additional plugin in sbatch command
F7T_USE_SPANK_PLUGIN="True;True"
# value of the plugin
F7T_SPANK_PLUGIN_OPTION=--nohome
#-------
# Storage:
# public systems to send a job for internal transfer (xfer), must be defined in SYSTEMS_PUBLIC
Expand Down
4 changes: 4 additions & 0 deletions deploy/test-build/environment/common.env
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ F7T_SYSTEMS_INTERNAL_UTILITIES='cluster'
# Base filesystem where job submission files will be stored.
# ; separated for system
F7T_COMPUTE_BASE_FS="/home;/home"
# enables the use of a additional plugin in sbatch command for each system
F7T_USE_SPANK_PLUGIN="True;True"
# value of the plugin
F7T_SPANK_PLUGIN_OPTION=--nohome
#-------
# Storage:
# public systems to send a job for internal transfer (xfer), must be defined in SYSTEMS_PUBLIC
Expand Down
2 changes: 1 addition & 1 deletion doc/openapi/firecrest-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ servers:
- url: 'http://FIRECREST_URL'
- url: 'https://FIRECREST_URL'
info:
version: 1.7.3-beta1
version: 1.7.4-beta1
title: FirecREST Developers API
description: >
This API specification is intended for FirecREST developers only. There're some endpoints that are not available in the public version for client developers.
Expand Down
2 changes: 1 addition & 1 deletion doc/openapi/firecrest-developers-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ servers:
- url: 'http://FIRECREST_URL'
- url: 'https://FIRECREST_URL'
info:
version: 1.7.3-beta1
version: 1.7.4-beta1
title: FirecREST API
description: >
FirecREST platform, a RESTful Services Gateway to HPC resources, is a
Expand Down
36 changes: 30 additions & 6 deletions src/compute/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,22 @@
SYSTEMS_PUBLIC = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";")
# internal machines to submit/query jobs
SYS_INTERNALS = os.environ.get("F7T_SYSTEMS_INTERNAL_COMPUTE").strip('\'"').split(";")

# Does the job machine have the spank plugin
USE_SPANK_PLUGIN = os.environ.get("F7T_USE_SPANK_PLUGIN", None)
if USE_SPANK_PLUGIN != None:
USE_SPANK_PLUGIN = USE_SPANK_PLUGIN.strip('\'"').split(";")
# cast to boolean
for i in range(len(USE_SPANK_PLUGIN)):
USE_SPANK_PLUGIN[i] = get_boolean_var(USE_SPANK_PLUGIN[i])
# spank plugin option value
SPANK_PLUGIN_OPTION = os.environ.get("F7T_SPANK_PLUGIN_OPTION","--nohome")

else:
# if not set, create a list of False values, one for each SYSTEM
USE_SPANK_PLUGIN = [False]*len(SYS_INTERNALS)


# Filesystems where to save sbatch files
# F7T_FILESYSTEMS = "/home,/scratch;/home"
FILESYSTEMS = os.environ.get("F7T_FILESYSTEMS").strip('\'"').split(";")
Expand Down Expand Up @@ -114,7 +130,7 @@ def extract_jobid(outline):
return jobid

# copies file and submits with sbatch
def submit_job_task(auth_header, system_name, system_addr, job_file, job_dir, task_id):
def submit_job_task(auth_header, system_name, system_addr, job_file, job_dir, use_plugin, task_id):

try:
# get scopes from token
Expand Down Expand Up @@ -168,7 +184,10 @@ def submit_job_task(auth_header, system_name, system_addr, job_file, job_dir, ta
return

# execute sbatch
action = f"sbatch --chdir={job_dir} {scopes_parameters} -- {job_file['filename']}"

plugin_option = ("" if not use_plugin else SPANK_PLUGIN_OPTION)

action = f"sbatch {plugin_option} --chdir={job_dir} {scopes_parameters} -- {job_file['filename']}"
app.logger.info(action)

retval = exec_remote_command(auth_header, system_name, system_addr, action)
Expand Down Expand Up @@ -280,7 +299,7 @@ def get_slurm_files(auth_header, system_name, system_addr, task_id,job_info,outp
# update_task(task_id, auth_header, async_task.SUCCESS, control_info,True)
return control_info

def submit_job_path_task(auth_header,system_name, system_addr,fileName,job_dir, task_id):
def submit_job_path_task(auth_header,system_name, system_addr,fileName,job_dir, use_plugin, task_id):

try:
# get scopes from token
Expand Down Expand Up @@ -310,8 +329,10 @@ def submit_job_path_task(auth_header,system_name, system_addr,fileName,job_dir,

app.logger.error(e.args)


plugin_option = ("" if not use_plugin else SPANK_PLUGIN_OPTION)

action=f"sbatch --chdir={job_dir} {scopes_parameters} -- {fileName}"
action=f"sbatch {plugin_option} --chdir={job_dir} {scopes_parameters} -- {fileName}"

resp = exec_remote_command(auth_header, system_name, system_addr, action)

Expand Down Expand Up @@ -375,6 +396,7 @@ def submit_job_upload():
# select index in the list corresponding with machine name
system_idx = SYSTEMS_PUBLIC.index(system_name)
system_addr = SYS_INTERNALS[system_idx]


# check if machine is accessible by user:
# exec test remote command
Expand Down Expand Up @@ -428,13 +450,14 @@ def submit_job_upload():
username = get_username(auth_header)

job_dir = f"{job_base_fs}/{username}/firecrest/{tmpdir}"
use_plugin = USE_SPANK_PLUGIN[system_idx]

app.logger.info(f"Job dir: {job_dir}")

try:
# asynchronous task creation
aTask = threading.Thread(target=submit_job_task,
args=(auth_header, system_name, system_addr, job_file, job_dir, task_id))
args=(auth_header, system_name, system_addr, job_file, job_dir, use_plugin, task_id))

aTask.start()
retval = update_task(task_id, auth_header,async_task.QUEUED)
Expand Down Expand Up @@ -470,6 +493,7 @@ def submit_job_path():
# select index in the list corresponding with machine name
system_idx = SYSTEMS_PUBLIC.index(system_name)
system_addr = SYS_INTERNALS[system_idx]
use_plugin = USE_SPANK_PLUGIN[system_idx]

# check if machine is accessible by user:
# exec test remote command
Expand Down Expand Up @@ -525,7 +549,7 @@ def submit_job_path():
try:
# asynchronous task creation
aTask = threading.Thread(target=submit_job_path_task,
args=(auth_header, system_name, system_addr, targetPath, job_dir, task_id))
args=(auth_header, system_name, system_addr, targetPath, job_dir, use_plugin, task_id))

aTask.start()
retval = update_task(task_id, auth_header, async_task.QUEUED, TASKS_URL)
Expand Down

0 comments on commit 5a0c2df

Please sign in to comment.