diff --git a/README.md b/README.md index f3d4aa4..b312ec0 100644 --- a/README.md +++ b/README.md @@ -116,8 +116,10 @@ $ ls -l cromwell.out* $ caper hpc abort [JOB_ID] ``` +## Customize resource parameters on HPCs + +If default settings of Caper does not work with your HPC, then see [this document](docs/resource_param.md) to manually customize resource command line (e.g. `sbatch ... [YOUR_CUSTOM_PARAMETER]`) for your chosen backend. # DETAILS See [details](DETAILS.md). - diff --git a/caper/__init__.py b/caper/__init__.py index dd38127..598fbd4 100644 --- a/caper/__init__.py +++ b/caper/__init__.py @@ -2,4 +2,4 @@ from .caper_runner import CaperRunner __all__ = ['CaperClient', 'CaperClientSubmit', 'CaperRunner'] -__version__ = '2.2.2' +__version__ = '2.2.3' diff --git a/caper/caper_init.py b/caper/caper_init.py index 37c151e..656f1b3 100644 --- a/caper/caper_init.py +++ b/caper/caper_init.py @@ -37,54 +37,15 @@ # It is not recommended to change it unless your cluster has custom resource settings. # See https://github.com/ENCODE-DCC/caper/blob/master/docs/resource_param.md for details.""" -CONF_CONTENTS_SLURM_PARAM = """ -# This parameter defines resource parameters for Caper's leader job only. -slurm-leader-job-resource-param={slurm_leader_job_resource_param} -{help_context} -slurm-resource-param={slurm_resource_param} -""".format( - help_context=CONF_CONTENTS_COMMON_RESOURCE_PARAM_HELP, - slurm_resource_param=CromwellBackendSlurm.DEFAULT_SLURM_RESOURCE_PARAM, - slurm_leader_job_resource_param=' '.join(SlurmWrapper.DEFAULT_LEADER_JOB_RESOURCE_PARAM), -) - +CONF_CONTENTS_SLURM_PARAM = "" CONF_CONTENTS_SGE_PARAM = """ -# This parameter defines resource parameters for Caper's leader job only. -sge-leader-job-resource-param={sge_leader_job_resource_param} - # Parallel environment of SGE: # Find one with `$ qconf -spl` or ask you admin to add one if not exists. -# If your cluster works without PE then edit the below sge-resource-param sge-pe= -{help_context} -sge-resource-param={sge_resource_param} -""".format( - help_context=CONF_CONTENTS_COMMON_RESOURCE_PARAM_HELP, - sge_resource_param=CromwellBackendSge.DEFAULT_SGE_RESOURCE_PARAM, - sge_leader_job_resource_param=' '.join(SgeWrapper.DEFAULT_LEADER_JOB_RESOURCE_PARAM), -) - -CONF_CONTENTS_PBS_PARAM = """ -# This parameter defines resource parameters for Caper's leader job only. -pbs-leader-job-resource-param={pbs_leader_job_resource_param} -{help_context} -pbs-resource-param={pbs_resource_param} -""".format( - help_context=CONF_CONTENTS_COMMON_RESOURCE_PARAM_HELP, - pbs_resource_param=CromwellBackendPbs.DEFAULT_PBS_RESOURCE_PARAM, - pbs_leader_job_resource_param=' '.join(PbsWrapper.DEFAULT_LEADER_JOB_RESOURCE_PARAM), -) +""" -CONF_CONTENTS_LSF_PARAM = """ -# This parameter defines resource parameters for Caper's leader job only. -lsf-leader-job-resource-param={lsf_leader_job_resource_param} -{help_context} -lsf-resource-param={lsf_resource_param} -""".format( - help_context=CONF_CONTENTS_COMMON_RESOURCE_PARAM_HELP, - lsf_resource_param=CromwellBackendLsf.DEFAULT_LSF_RESOURCE_PARAM, - lsf_leader_job_resource_param=' '.join(LsfWrapper.DEFAULT_LEADER_JOB_RESOURCE_PARAM), -) +CONF_CONTENTS_PBS_PARAM = "" +CONF_CONTENTS_LSF_PARAM = "" DEFAULT_CONF_CONTENTS_LOCAL = ( """backend=local @@ -109,10 +70,6 @@ DEFAULT_CONF_CONTENTS_SGE = ( """backend=sge - -# Parallel environement is required, ask your administrator to create one -# If your cluster doesn't support PE then edit 'sge-resource-param' -# to fit your cluster's configuration. """ + CONF_CONTENTS_TMP_DIR + CONF_CONTENTS_SGE_PARAM diff --git a/docs/resource_param.md b/docs/resource_param.md index 146ffaf..be45a41 100644 --- a/docs/resource_param.md +++ b/docs/resource_param.md @@ -10,3 +10,65 @@ You can use Cromwell's built-in variables (attributes defined in WDL task's runt defined in WDL task's runtime - `time`: Time limit for a job in hour - `gpu`: Specified gpu name or number of gpus (it's declared as String) + +# How to configure resource parameters on HPCs + +Open `~/.caper/default.conf` with a text editor and add the following code lines according to your HPC type. Following commented instructions to customize resource parameters of HPC's submit/monitor/delete commands. + +## SLURM + +```ini +# This parameter defines resource parameters for Caper's leader job only. +slurm-leader-job-resource-param=-t 48:00:00 --mem 4G + +# This parameter defines resource parameters for submitting WDL task to job engine. +# It is for HPC backends only (slurm, sge, pbs and lsf). +# It is not recommended to change it unless your cluster has custom resource settings. +# See https://github.com/ENCODE-DCC/caper/blob/master/docs/resource_param.md for details. +slurm-resource-param=-n 1 --ntasks-per-node=1 --cpus-per-task=${cpu} ${if defined(memory_mb) then "--mem=" else ""}${memory_mb}${if defined(memory_mb) then "M" else ""} ${if defined(time) then "--time=" else ""}${time*60} ${if defined(gpu) then "--gres=gpu:" else ""}${gpu} + +``` + +## SGE + +```ini +# This parameter defines resource parameters for Caper's leader job only. +sge-leader-job-resource-param=-l h_rt=48:00:00,h_vmem=4G + +# Parallel environment of SGE: +# Find one with `$ qconf -spl` or ask you admin to add one if not exists. +# If your cluster works without PE then edit the below sge-resource-param +sge-pe= + +# This parameter defines resource parameters for submitting WDL task to job engine. +# It is for HPC backends only (slurm, sge, pbs and lsf). +# It is not recommended to change it unless your cluster has custom resource settings. +# See https://github.com/ENCODE-DCC/caper/blob/master/docs/resource_param.md for details. +sge-resource-param=${if cpu > 1 then "-pe " + sge_pe + " " else ""} ${if cpu > 1 then cpu else ""} ${true="-l h_vmem=$(expr " false="" defined(memory_mb)}${memory_mb}${true=" / " false="" defined(memory_mb)}${if defined(memory_mb) then cpu else ""}${true=")m" false="" defined(memory_mb)} ${true="-l s_vmem=$(expr " false="" defined(memory_mb)}${memory_mb}${true=" / " false="" defined(memory_mb)}${if defined(memory_mb) then cpu else ""}${true=")m" false="" defined(memory_mb)} ${"-l h_rt=" + time + ":00:00"} ${"-l s_rt=" + time + ":00:00"} ${"-l gpu=" + gpu} +``` + +## PBS + +```ini +# This parameter defines resource parameters for Caper's leader job only. +pbs-leader-job-resource-param=-l walltime=48:00:00,mem=4gb + +# This parameter defines resource parameters for submitting WDL task to job engine. +# It is for HPC backends only (slurm, sge, pbs and lsf). +# It is not recommended to change it unless your cluster has custom resource settings. +# See https://github.com/ENCODE-DCC/caper/blob/master/docs/resource_param.md for details. +pbs-resource-param=${"-lnodes=1:ppn=" + cpu}${if defined(gpu) then ":gpus=" + gpu else ""} ${if defined(memory_mb) then "-l mem=" else ""}${memory_mb}${if defined(memory_mb) then "mb" else ""} ${"-lwalltime=" + time + ":0:0"} +``` + +## LSF + +```ini +# This parameter defines resource parameters for Caper's leader job only. +lsf-leader-job-resource-param=-W 2880 -M 4g + +# This parameter defines resource parameters for submitting WDL task to job engine. +# It is for HPC backends only (slurm, sge, pbs and lsf). +# It is not recommended to change it unless your cluster has custom resource settings. +# See https://github.com/ENCODE-DCC/caper/blob/master/docs/resource_param.md for details. +lsf-resource-param=${"-n " + cpu} ${if defined(gpu) then "-gpu " + gpu else ""} ${if defined(memory_mb) then "-M " else ""}${memory_mb}${if defined(memory_mb) then "m" else ""} ${"-W " + 60*time} +```