Skip to content

Commit

Permalink
submit_prod threshold to 4500 and remove Slurm env vars
Browse files Browse the repository at this point in the history
  • Loading branch information
akremin committed Aug 16, 2024
1 parent 2adb960 commit 6bae2ae
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 5 deletions.
4 changes: 2 additions & 2 deletions bin/desi_submit_prod
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# coding: utf-8
# -*- coding: utf-8 -*-

import argparse

Expand All @@ -14,7 +14,7 @@ def parse_args(): # options=None):

parser.add_argument("-p", "--production-yaml", type=str, required=True,
help="Relative or absolute pathname to the yaml file summarizing the production.")
parser.add_argument("-q", "--queue-threshold", type=int, default=4800,
parser.add_argument("-q", "--queue-threshold", type=int, default=4500,
help="The number of jobs for the current user in the queue at which the"
+ " at which the script stops submitting new jobs.")

Expand Down
14 changes: 11 additions & 3 deletions py/desispec/scripts/submit_prod.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
from desispec.io import findfile
from desispec.scripts.proc_night import proc_night
## Import some helper functions, you can see their definitions by uncomenting the bash shell command
from desispec.workflow.utils import verify_variable_with_environment, listpath
from desispec.workflow.utils import verify_variable_with_environment, listpath, \
remove_slurm_environment_variables
# TODO when merged into branch with crossnight dependencies this is in workflow.exptable as the new name
# from desispec.workflow.exptable import read_minimal_science_exptab_cols
from desispec.workflow.redshifts import read_minimal_exptables_columns as read_minimal_science_exptab_cols
from desispec.scripts.submit_night import submit_night
from desispec.workflow.queue import check_queue_count
Expand Down Expand Up @@ -130,7 +132,7 @@ def get_nights_to_process(production_yaml, verbose=False):
return sorted(all_nights)


def submit_production(production_yaml, queue_threshold=4800, dry_run_level=False):
def submit_production(production_yaml, queue_threshold=4500, dry_run_level=False):
"""
Interprets a production_yaml file and submits the respective nights for processing
within the defined production.
Expand All @@ -152,17 +154,23 @@ def submit_production(production_yaml, queue_threshold=4800, dry_run_level=False
with open(production_yaml, 'rb') as yamlfile:
conf = yaml.safe_load(yamlfile)

## Unset Slurm environment variables set when running in scrontab
remove_slurm_environment_variables()

## Make sure the specprod matches, if not set it to that in the file
if 'SPECPROD' not in conf:
raise ValueError(f"SPECPROD required in yaml file {production_yaml}")
specprod = str(conf['SPECPROD']).lower()
specprod = verify_variable_with_environment(var=specprod, var_name='specprod',
env_name='SPECPROD')

## Define the user
user = os.environ['USER']

## Look for sentinal
sentinel_file = os.path.join(os.environ['DESI_SPECTRO_REDUX'],
os.environ['SPECPROD'], 'run', 'sentinel.txt')
os.environ['SPECPROD'], 'run',
'prod_submission_complete.txt')
if os.path.exists(sentinel_file):
log.info(f"Sentinel file {sentinel_file} exists, therefore all "
+ f"nights already submitted.")
Expand Down
5 changes: 5 additions & 0 deletions py/desispec/workflow/queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,11 @@ def get_jobs_in_queue(user=None, include_scron=False, dry_run_level=0):
NODELIST(REASON) for the specified user.
"""
log = get_logger()
if user is None:
if 'USER' in os.environ:
user = os.environ['USER']
else:
user = 'desi'

cmd = f'squeue -u {user} -o "%i,%P,%j,%u,%t,%M,%D,%R"'
cmd_as_list = cmd.split()
Expand Down
11 changes: 11 additions & 0 deletions py/desispec/workflow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,14 @@ def sleep_and_report(sleep_duration=0.1, message_suffix="", logfunc=None, dry_ru
time.sleep(sleep_duration)
logfunc(f"Resuming...")
logfunc("\n\n")

def remove_slurm_environment_variables():
"""
Removes SLURM_MEM_PER_CPU and SLURM_OPEN_MODE from os.environ if present
"""
log = get_logger()
for var in ['SLURM_MEM_PER_CPU', 'SLURM_OPEN_MODE']:
if var in os.environ:
log.info(f"Removing Slurm variable {var} from the environment"
+ " before running.")
del os.environ[var]

0 comments on commit 6bae2ae

Please sign in to comment.