diff --git a/articles/slurmjobs.html b/articles/slurmjobs.html index 21d3c62..b6636db 100644 --- a/articles/slurmjobs.html +++ b/articles/slurmjobs.html @@ -85,7 +85,7 @@
vignettes/slurmjobs.Rmd
slurmjobs.Rmd
sbatch
job_single(
name = "my_shell_script", memory = "10G", cores = 2, create_shell = FALSE
)
-#> 2023-10-10 17:29:56.073699 creating the logs directory at: logs
+#> 2023-10-12 18:50:09.164475 creating the logs directory at: logs
#> #!/bin/bash
#> #SBATCH -p shared
#> #SBATCH --mem-per-cpu=10G
@@ -249,7 +249,7 @@ Creating Shell Scripts to sbatch
name = "my_array_job", memory = "5G", cores = 1, create_shell = FALSE,
task_num = 10
)
-#> 2023-10-10 17:29:56.172312 creating the logs directory at: logs
+#> 2023-10-12 18:50:09.247479 creating the logs directory at: logs
#> #!/bin/bash
#> #SBATCH -p shared
#> #SBATCH --mem-per-cpu=5G
@@ -286,86 +286,75 @@ Creating Shell Scripts to sbatch
#>
#> ## This script was made using slurmjobs version 0.99.0
#> ## available from http://research.libd.org/slurmjobs/
-
job_loop()
is a little bit more complicated since you
-have to specify the loops
named list argument. The
-loops
argument specifies the bash
variable
-names and values to loop through for creating a series of
-bash
scripts that will get submitted to SLURM. This type of
-bash
script is something we use frequently, for example in
-the compute_weights.sh
-script (Collado-Torres, Burke, Peterson, Shin, Straub, Rajpurohit,
-Semick, Ulrich, Price, Valencia, Tao, Deep-Soboslay, Hyde, Kleinman,
-Weinberger, and Jaffe, 2019). This type of script generator I believe is
-something Alyssa Frazee taught me
-back in the day which you can see in some old repositories such as leekgroup/derSoftware
.
-Besides the loops
argument, job_loop()
shares
-most of the options with job_single()
.
+Another function, job_loop()
, can be used to create more
+complex array jobs as compared with job_single()
. It’s
+useful when looping through one or more variables with pre-defined
+values, and applying the same processing steps. The key difference is
+that rather than specifying task_num
, you specify
+loops
, a named list of variables to loop through. An array
+job then gets created that can directly refer to the values of these
+variables, rather than referring to just the array’s task ID.
job_loop(
loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
name = "bsp2_test"
)
-#> #!/bin/bash
-#>
-#> ## Usage:
-#> # sh bsp2_test.sh
-#>
-#> ## Create the logs directory
-#> mkdir -p logs
-#>
-#> for region in DLPFC HIPPO; do
-#> for feature in gene exon tx jxn; do
-#>
-#> ## Internal script name
-#> SHORT="bsp2_test_${region}_${feature}"
-#>
-#> # Construct shell file
-#> echo "Creating script bsp2_test_${region}_${feature}"
-#> cat > .${SHORT}.sh <<EOF
-#> #!/bin/bash
-#> #SBATCH -p shared
-#> #SBATCH --mem-per-cpu=10G
-#> #SBATCH --job-name=${SHORT}
-#> #SBATCH -c 1
-#> #SBATCH -o logs/${SHORT}.txt
-#> #SBATCH -e logs/${SHORT}.txt
-#> #SBATCH --mail-type=ALL
-#>
-#> set -e
-#>
-#> echo "**** Job starts ****"
-#> date
-#>
-#> echo "**** JHPCE info ****"
-#> echo "User: \${USER}"
-#> echo "Job id: \${SLURM_JOB_ID}"
-#> echo "Job name: \${SLURM_JOB_NAME}"
-#> echo "Node name: \${SLURMD_NODENAME}"
-#> echo "Task id: \${SLURM_ARRAY_TASK_ID}"
-#>
-#> ## Load the R module
-#> module load conda_R
-#>
-#> ## List current modules for reproducibility
-#> module list
-#>
-#> ## Edit with your job command
-#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
-#>
-#> echo "**** Job ends ****"
-#> date
-#>
-#> ## This script was made using slurmjobs version 0.99.0
-#> ## available from http://research.libd.org/slurmjobs/
-#>
-#>
-#> EOF
-#>
-#> call="sbatch .${SHORT}.sh"
-#> echo $call
-#> $call
-#> done
-#> done
+#> [1] "#!/bin/bash"
+#> [2] "#SBATCH -p shared"
+#> [3] "#SBATCH --mem-per-cpu=10G"
+#> [4] "#SBATCH --job-name=bsp2_test"
+#> [5] "#SBATCH -c 1"
+#> [6] "#SBATCH -o /dev/null"
+#> [7] "#SBATCH -e /dev/null"
+#> [8] "#SBATCH --mail-type=ALL"
+#> [9] "#SBATCH --array=1-8%20"
+#> [10] ""
+#> [11] "## Define loops and appropriately subset each variable for the array task ID"
+#> [12] "all_region=(DLPFC HIPPO)"
+#> [13] "region=${all_region[$(( $SLURM_ARRAY_TASK_ID / 4 % 2 ))]}"
+#> [14] ""
+#> [15] "all_feature=(gene exon tx jxn)"
+#> [16] "feature=${all_feature[$(( $SLURM_ARRAY_TASK_ID / 1 % 4 ))]}"
+#> [17] ""
+#> [18] "## Explicitly pipe script output to a log"
+#> [19] "log_path=logs/bsp2_test_${region}_${feature}_${SLURM_ARRAY_TASK_ID}.txt"
+#> [20] ""
+#> [21] "{"
+#> [22] "set -e"
+#> [23] ""
+#> [24] "echo \"**** Job starts ****\""
+#> [25] "date"
+#> [26] ""
+#> [27] "echo \"**** JHPCE info ****\""
+#> [28] "echo \"User: ${USER}\""
+#> [29] "echo \"Job id: ${SLURM_JOB_ID}\""
+#> [30] "echo \"Job name: ${SLURM_JOB_NAME}\""
+#> [31] "echo \"Node name: ${SLURMD_NODENAME}\""
+#> [32] "echo \"Task id: ${SLURM_ARRAY_TASK_ID}\""
+#> [33] ""
+#> [34] "## Load the R module"
+#> [35] "module load conda_R"
+#> [36] ""
+#> [37] "## List current modules for reproducibility"
+#> [38] "module list"
+#> [39] ""
+#> [40] "## Edit with your job command"
+#> [41] "Rscript -e \"options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()\""
+#> [42] ""
+#> [43] "echo \"**** Job ends ****\""
+#> [44] "date"
+#> [45] ""
+#> [46] "} > $log_path 2>&1"
+#> [47] ""
+#> [48] "## This script was made using slurmjobs version 0.99.0"
+#> [49] "## available from http://research.libd.org/slurmjobs/"
+#> [50] ""
+
Notice also that logs from executing this shell script get named with
+each of the variables’ values in addition to the array task ID. For
+example, the log for the first task would be
+logs/DLPFC_gene_1.txt
. Also, the array specifies 8 tasks
+total (the product of the number of region
s and
+feature
s).
Submitting and Resubmitting Jobs
@@ -388,8 +377,8 @@ Submitting and Resubmitting Jobs name = "my_array_job", memory = "5G", cores = 1, create_shell = TRUE,
task_num = 10
)
-#> 2023-10-10 17:29:57.081484 creating the logs directory at: logs
-#> 2023-10-10 17:29:57.083036 creating the shell file my_array_job.sh
+#> 2023-10-12 18:50:09.408435 creating the logs directory at: logs
+#> 2023-10-12 18:50:09.409813 creating the shell file my_array_job.sh
#> To submit the job use: sbatch my_array_job.sh
# Suppose that tasks 3, 6, 7, and 8 failed
@@ -615,9 +604,9 @@ Reproducibilitylibrary("knitr")
knit("slurmjobs.Rmd", tangle = TRUE)
Date the vignette was generated.
-#> [1] "2023-10-10 17:29:58 UTC"
+#> [1] "2023-10-12 18:50:10 UTC"
Wallclock time spent generating the vignette.
-#> Time difference of 3.398 secs
+#> Time difference of 2.278 secs
R
session information.
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
#> setting value
@@ -629,7 +618,7 @@ Reproducibility#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz UTC
-#> date 2023-10-10
+#> date 2023-10-12
#> pandoc 3.1.1 @ /usr/local/bin/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
@@ -643,7 +632,6 @@ Reproducibility#> cachem 1.0.8 2023-05-01 [2] RSPM (R 4.3.0)
#> cli 3.6.1 2023-03-23 [2] RSPM (R 4.3.0)
#> crayon 1.5.2 2022-09-29 [2] RSPM (R 4.3.0)
-#> curl 5.1.0 2023-10-02 [2] RSPM (R 4.3.0)
#> desc 1.4.2 2022-09-08 [2] RSPM (R 4.3.0)
#> digest 0.6.33 2023-07-07 [2] RSPM (R 4.3.0)
#> dplyr * 1.1.3 2023-09-03 [1] RSPM (R 4.3.0)
@@ -669,7 +657,7 @@ Reproducibility#> plyr 1.8.9 2023-10-02 [1] RSPM (R 4.3.0)
#> purrr 1.0.2 2023-08-10 [2] RSPM (R 4.3.0)
#> R6 2.5.1 2021-08-19 [2] RSPM (R 4.3.0)
-#> ragg 1.2.5 2023-01-12 [2] RSPM (R 4.3.0)
+#> ragg 1.2.6 2023-10-10 [2] RSPM (R 4.3.0)
#> Rcpp 1.0.11 2023-07-06 [2] RSPM (R 4.3.0)
#> RefManageR * 1.4.0 2022-09-30 [1] RSPM (R 4.3.0)
#> rlang 1.1.1 2023-04-28 [2] RSPM (R 4.3.0)
@@ -677,7 +665,7 @@ Reproducibility#> rprojroot 2.0.3 2022-04-02 [2] RSPM (R 4.3.0)
#> sass 0.4.7 2023-07-15 [2] RSPM (R 4.3.0)
#> sessioninfo * 1.2.2 2021-12-06 [2] RSPM (R 4.3.0)
-#> slurmjobs * 0.99.0 2023-10-10 [1] local
+#> slurmjobs * 0.99.0 2023-10-12 [1] local
#> stringi 1.7.12 2023-01-11 [2] RSPM (R 4.3.0)
#> stringr 1.5.0 2022-12-02 [2] RSPM (R 4.3.0)
#> systemfonts 1.0.5 2023-10-09 [2] RSPM (R 4.3.0)
diff --git a/pkgdown.yml b/pkgdown.yml
index b4b670d..668225a 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -3,5 +3,5 @@ pkgdown: 2.0.7
pkgdown_sha: ~
articles:
slurmjobs: slurmjobs.html
-last_built: 2023-10-10T17:29Z
+last_built: 2023-10-12T18:50Z
diff --git a/reference/array_submit.html b/reference/array_submit.html
index 38e8651..8c05683 100644
--- a/reference/array_submit.html
+++ b/reference/array_submit.html
@@ -141,10 +141,10 @@ Examples
submit = FALSE
)
})
-#> 2023-10-10 17:29:51.568844 creating the logs directory at: logs
-#> 2023-10-10 17:29:51.570559 creating the shell file array_submit_example_2023-10-10.sh
-#> To submit the job use: sbatch array_submit_example_2023-10-10.sh
-#> [1] "array_submit_example_2023-10-10.sh"
+#> 2023-10-12 18:50:05.478047 creating the logs directory at: logs
+#> 2023-10-12 18:50:05.479613 creating the shell file array_submit_example_2023-10-12.sh
+#> To submit the job use: sbatch array_submit_example_2023-10-12.sh
+#> [1] "array_submit_example_2023-10-12.sh"
diff --git a/reference/job_loop.html b/reference/job_loop.html
index a760bc1..ce26868 100644
--- a/reference/job_loop.html
+++ b/reference/job_loop.html
@@ -1,7 +1,6 @@
-Build a bash script that loops over variables and submits SLURM jobs — job_loop • slurmjobs Build a bash script that loops over variables and submits SLURM jobs — job_loop • slurmjobs
@@ -54,9 +53,8 @@ Build a bash script that loops over variables and submits SLURM jobs
- This function builds a bash script that loops over a set of variables
-with pre-specified values to create an internal bash script that then
-gets submitted as a SLURM job.
+ This function builds a bash script functioning as an array job that loops
+over a set of variables with pre-specified values.
@@ -67,10 +65,9 @@ Build a bash script that loops over variables and submits SLURM jobs
partition = "shared",
memory = "10G",
cores = 1L,
+ tc = 20,
email = "ALL",
- logdir = "logs",
- task_num = NULL,
- tc = 20
+ logdir = "logs"
)
@@ -108,6 +105,11 @@ Arguments
your job will request is cores
multiplied by memory
.
+tc
+If task_num
is specified, this option controls the number of
+concurrent tasks.
+
+
email
The email reporting option for the email address ("BEGIN",
"END", "FAIL", or "ALL")
@@ -117,16 +119,6 @@ Arguments
The directory for the log files relative to the current
working directory.
-
-task_num
-The number of tasks for your job, which will make it into an
-array job. If NULL
this is ignored.
-
-
-tc
-If task_num
is specified, this option controls the number of
-concurrent tasks.
-
Value
@@ -138,8 +130,8 @@ Value
@@ -147,140 +139,59 @@ Examples
job_loop(
loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
- name = "bsp2_test"
-)
-#> #!/bin/bash
-#>
-#> ## Usage:
-#> # sh bsp2_test.sh
-#>
-#> ## Create the logs directory
-#> mkdir -p logs
-#>
-#> for region in DLPFC HIPPO; do
-#> for feature in gene exon tx jxn; do
-#>
-#> ## Internal script name
-#> SHORT="bsp2_test_${region}_${feature}"
-#>
-#> # Construct shell file
-#> echo "Creating script bsp2_test_${region}_${feature}"
-#> cat > .${SHORT}.sh <<EOF
-#> #!/bin/bash
-#> #SBATCH -p shared
-#> #SBATCH --mem-per-cpu=10G
-#> #SBATCH --job-name=${SHORT}
-#> #SBATCH -c 1
-#> #SBATCH -o logs/${SHORT}.txt
-#> #SBATCH -e logs/${SHORT}.txt
-#> #SBATCH --mail-type=ALL
-#>
-#> set -e
-#>
-#> echo "**** Job starts ****"
-#> date
-#>
-#> echo "**** JHPCE info ****"
-#> echo "User: \${USER}"
-#> echo "Job id: \${SLURM_JOB_ID}"
-#> echo "Job name: \${SLURM_JOB_NAME}"
-#> echo "Node name: \${SLURMD_NODENAME}"
-#> echo "Task id: \${SLURM_ARRAY_TASK_ID}"
-#>
-#> ## Load the R module
-#> module load conda_R
-#>
-#> ## List current modules for reproducibility
-#> module list
-#>
-#> ## Edit with your job command
-#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
-#>
-#> echo "**** Job ends ****"
-#> date
-#>
-#> ## This script was made using slurmjobs version 0.99.0
-#> ## available from http://research.libd.org/slurmjobs/
-#>
-#>
-#> EOF
-#>
-#> call="sbatch .${SHORT}.sh"
-#> echo $call
-#> $call
-#> done
-#> done
-#>
-
-job_loop(
- loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
- cores = 5,
- task_num = 10,
- name = "bsp2_test_array"
+ name = "bsp2_test_array",
+ cores = 2
)
-#> #!/bin/bash
-#>
-#> ## Usage:
-#> # sh bsp2_test_array.sh
-#>
-#> ## Create the logs directory
-#> mkdir -p logs
-#>
-#> for region in DLPFC HIPPO; do
-#> for feature in gene exon tx jxn; do
-#>
-#> ## Internal script name
-#> SHORT="bsp2_test_array_${region}_${feature}"
-#>
-#> # Construct shell file
-#> echo "Creating script bsp2_test_array_${region}_${feature}"
-#> cat > .${SHORT}.sh <<EOF
-#> #!/bin/bash
-#> #SBATCH -p shared
-#> #SBATCH --mem-per-cpu=10G
-#> #SBATCH --job-name=${SHORT}
-#> #SBATCH -c 5
-#> #SBATCH -o logs/${SHORT}.%a.txt
-#> #SBATCH -e logs/${SHORT}.%a.txt
-#> #SBATCH --mail-type=ALL
-#> #SBATCH --array=1-10%20
-#>
-#> set -e
-#>
-#> echo "**** Job starts ****"
-#> date
-#>
-#> echo "**** JHPCE info ****"
-#> echo "User: \${USER}"
-#> echo "Job id: \${SLURM_JOB_ID}"
-#> echo "Job name: \${SLURM_JOB_NAME}"
-#> echo "Node name: \${SLURMD_NODENAME}"
-#> echo "Task id: \${SLURM_ARRAY_TASK_ID}"
-#>
-#> ## Load the R module
-#> module load conda_R
-#>
-#> ## List current modules for reproducibility
-#> module list
-#>
-#> ## Edit with your job command
-#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
-#>
-#> echo "**** Job ends ****"
-#> date
-#>
-#> ## This script was made using slurmjobs version 0.99.0
-#> ## available from http://research.libd.org/slurmjobs/
-#>
-#>
-#> EOF
-#>
-#> call="sbatch .${SHORT}.sh"
-#> echo $call
-#> $call
-#> done
-#> done
-#>
+#> [1] "#!/bin/bash"
+#> [2] "#SBATCH -p shared"
+#> [3] "#SBATCH --mem-per-cpu=10G"
+#> [4] "#SBATCH --job-name=bsp2_test_array"
+#> [5] "#SBATCH -c 2"
+#> [6] "#SBATCH -o /dev/null"
+#> [7] "#SBATCH -e /dev/null"
+#> [8] "#SBATCH --mail-type=ALL"
+#> [9] "#SBATCH --array=1-8%20"
+#> [10] ""
+#> [11] "## Define loops and appropriately subset each variable for the array task ID"
+#> [12] "all_region=(DLPFC HIPPO)"
+#> [13] "region=${all_region[$(( $SLURM_ARRAY_TASK_ID / 4 % 2 ))]}"
+#> [14] ""
+#> [15] "all_feature=(gene exon tx jxn)"
+#> [16] "feature=${all_feature[$(( $SLURM_ARRAY_TASK_ID / 1 % 4 ))]}"
+#> [17] ""
+#> [18] "## Explicitly pipe script output to a log"
+#> [19] "log_path=logs/bsp2_test_array_${region}_${feature}_${SLURM_ARRAY_TASK_ID}.txt"
+#> [20] ""
+#> [21] "{"
+#> [22] "set -e"
+#> [23] ""
+#> [24] "echo \"**** Job starts ****\""
+#> [25] "date"
+#> [26] ""
+#> [27] "echo \"**** JHPCE info ****\""
+#> [28] "echo \"User: ${USER}\""
+#> [29] "echo \"Job id: ${SLURM_JOB_ID}\""
+#> [30] "echo \"Job name: ${SLURM_JOB_NAME}\""
+#> [31] "echo \"Node name: ${SLURMD_NODENAME}\""
+#> [32] "echo \"Task id: ${SLURM_ARRAY_TASK_ID}\""
+#> [33] ""
+#> [34] "## Load the R module"
+#> [35] "module load conda_R"
+#> [36] ""
+#> [37] "## List current modules for reproducibility"
+#> [38] "module list"
+#> [39] ""
+#> [40] "## Edit with your job command"
+#> [41] "Rscript -e \"options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()\""
+#> [42] ""
+#> [43] "echo \"**** Job ends ****\""
+#> [44] "date"
+#> [45] ""
+#> [46] "} > $log_path 2>&1"
+#> [47] ""
+#> [48] "## This script was made using slurmjobs version 0.99.0"
+#> [49] "## available from http://research.libd.org/slurmjobs/"
+#> [50] ""