diff --git a/articles/slurmjobs.html b/articles/slurmjobs.html index 21d3c62..b6636db 100644 --- a/articles/slurmjobs.html +++ b/articles/slurmjobs.html @@ -85,7 +85,7 @@

Leonardo University
lcolladotor@gmail.com -

10 October 2023

+

12 October 2023

Source: vignettes/slurmjobs.Rmd @@ -206,7 +206,7 @@

Creating Shell Scripts to sbatch job_single( name = "my_shell_script", memory = "10G", cores = 2, create_shell = FALSE ) -#> 2023-10-10 17:29:56.073699 creating the logs directory at: logs +#> 2023-10-12 18:50:09.164475 creating the logs directory at: logs #> #!/bin/bash #> #SBATCH -p shared #> #SBATCH --mem-per-cpu=10G @@ -249,7 +249,7 @@

Creating Shell Scripts to sbatch name = "my_array_job", memory = "5G", cores = 1, create_shell = FALSE, task_num = 10 ) -#> 2023-10-10 17:29:56.172312 creating the logs directory at: logs +#> 2023-10-12 18:50:09.247479 creating the logs directory at: logs #> #!/bin/bash #> #SBATCH -p shared #> #SBATCH --mem-per-cpu=5G @@ -286,86 +286,75 @@

Creating Shell Scripts to sbatch #> #> ## This script was made using slurmjobs version 0.99.0 #> ## available from http://research.libd.org/slurmjobs/ -

job_loop() is a little bit more complicated since you -have to specify the loops named list argument. The -loops argument specifies the bash variable -names and values to loop through for creating a series of -bash scripts that will get submitted to SLURM. This type of -bash script is something we use frequently, for example in -the compute_weights.sh -script (Collado-Torres, Burke, Peterson, Shin, Straub, Rajpurohit, -Semick, Ulrich, Price, Valencia, Tao, Deep-Soboslay, Hyde, Kleinman, -Weinberger, and Jaffe, 2019). This type of script generator I believe is -something Alyssa Frazee taught me -back in the day which you can see in some old repositories such as leekgroup/derSoftware. -Besides the loops argument, job_loop() shares -most of the options with job_single().

+

Another function, job_loop(), can be used to create more +complex array jobs as compared with job_single(). It’s +useful when looping through one or more variables with pre-defined +values, and applying the same processing steps. The key difference is +that rather than specifying task_num, you specify +loops, a named list of variables to loop through. An array +job then gets created that can directly refer to the values of these +variables, rather than referring to just the array’s task ID.

 job_loop(
     loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
     name = "bsp2_test"
 )
-#> #!/bin/bash
-#> 
-#> ## Usage:
-#> # sh bsp2_test.sh
-#> 
-#> ## Create the logs directory
-#> mkdir -p logs
-#> 
-#> for region in DLPFC HIPPO; do
-#>     for feature in gene exon tx jxn; do
-#> 
-#>     ## Internal script name
-#>     SHORT="bsp2_test_${region}_${feature}"
-#> 
-#>     # Construct shell file
-#>     echo "Creating script bsp2_test_${region}_${feature}"
-#>     cat > .${SHORT}.sh <<EOF
-#> #!/bin/bash
-#> #SBATCH -p shared
-#> #SBATCH --mem-per-cpu=10G
-#> #SBATCH --job-name=${SHORT}
-#> #SBATCH -c 1
-#> #SBATCH -o logs/${SHORT}.txt
-#> #SBATCH -e logs/${SHORT}.txt
-#> #SBATCH --mail-type=ALL
-#> 
-#> set -e
-#> 
-#> echo "**** Job starts ****"
-#> date
-#> 
-#> echo "**** JHPCE info ****"
-#> echo "User: \${USER}"
-#> echo "Job id: \${SLURM_JOB_ID}"
-#> echo "Job name: \${SLURM_JOB_NAME}"
-#> echo "Node name: \${SLURMD_NODENAME}"
-#> echo "Task id: \${SLURM_ARRAY_TASK_ID}"
-#> 
-#> ## Load the R module
-#> module load conda_R
-#> 
-#> ## List current modules for reproducibility
-#> module list
-#> 
-#> ## Edit with your job command
-#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
-#> 
-#> echo "**** Job ends ****"
-#> date
-#> 
-#> ## This script was made using slurmjobs version 0.99.0
-#> ## available from http://research.libd.org/slurmjobs/
-#> 
-#> 
-#> EOF
-#> 
-#>     call="sbatch .${SHORT}.sh"
-#>     echo $call
-#>     $call
-#>     done
-#> done
+#> [1] "#!/bin/bash" +#> [2] "#SBATCH -p shared" +#> [3] "#SBATCH --mem-per-cpu=10G" +#> [4] "#SBATCH --job-name=bsp2_test" +#> [5] "#SBATCH -c 1" +#> [6] "#SBATCH -o /dev/null" +#> [7] "#SBATCH -e /dev/null" +#> [8] "#SBATCH --mail-type=ALL" +#> [9] "#SBATCH --array=1-8%20" +#> [10] "" +#> [11] "## Define loops and appropriately subset each variable for the array task ID" +#> [12] "all_region=(DLPFC HIPPO)" +#> [13] "region=${all_region[$(( $SLURM_ARRAY_TASK_ID / 4 % 2 ))]}" +#> [14] "" +#> [15] "all_feature=(gene exon tx jxn)" +#> [16] "feature=${all_feature[$(( $SLURM_ARRAY_TASK_ID / 1 % 4 ))]}" +#> [17] "" +#> [18] "## Explicitly pipe script output to a log" +#> [19] "log_path=logs/bsp2_test_${region}_${feature}_${SLURM_ARRAY_TASK_ID}.txt" +#> [20] "" +#> [21] "{" +#> [22] "set -e" +#> [23] "" +#> [24] "echo \"**** Job starts ****\"" +#> [25] "date" +#> [26] "" +#> [27] "echo \"**** JHPCE info ****\"" +#> [28] "echo \"User: ${USER}\"" +#> [29] "echo \"Job id: ${SLURM_JOB_ID}\"" +#> [30] "echo \"Job name: ${SLURM_JOB_NAME}\"" +#> [31] "echo \"Node name: ${SLURMD_NODENAME}\"" +#> [32] "echo \"Task id: ${SLURM_ARRAY_TASK_ID}\"" +#> [33] "" +#> [34] "## Load the R module" +#> [35] "module load conda_R" +#> [36] "" +#> [37] "## List current modules for reproducibility" +#> [38] "module list" +#> [39] "" +#> [40] "## Edit with your job command" +#> [41] "Rscript -e \"options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()\"" +#> [42] "" +#> [43] "echo \"**** Job ends ****\"" +#> [44] "date" +#> [45] "" +#> [46] "} > $log_path 2>&1" +#> [47] "" +#> [48] "## This script was made using slurmjobs version 0.99.0" +#> [49] "## available from http://research.libd.org/slurmjobs/" +#> [50] ""
+

Notice also that logs from executing this shell script get named with +each of the variables’ values in addition to the array task ID. For +example, the log for the first task would be +logs/DLPFC_gene_1.txt. Also, the array specifies 8 tasks +total (the product of the number of regions and +features).

Date the vignette was generated.

-
#> [1] "2023-10-10 17:29:58 UTC"
+
#> [1] "2023-10-12 18:50:10 UTC"

Wallclock time spent generating the vignette.

-
#> Time difference of 3.398 secs
+
#> Time difference of 2.278 secs

R session information.

#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 #>  setting  value
@@ -629,7 +618,7 @@ 

Reproducibility#> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz UTC -#> date 2023-10-10 +#> date 2023-10-12 #> pandoc 3.1.1 @ /usr/local/bin/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────── @@ -643,7 +632,6 @@

Reproducibility#> cachem 1.0.8 2023-05-01 [2] RSPM (R 4.3.0) #> cli 3.6.1 2023-03-23 [2] RSPM (R 4.3.0) #> crayon 1.5.2 2022-09-29 [2] RSPM (R 4.3.0) -#> curl 5.1.0 2023-10-02 [2] RSPM (R 4.3.0) #> desc 1.4.2 2022-09-08 [2] RSPM (R 4.3.0) #> digest 0.6.33 2023-07-07 [2] RSPM (R 4.3.0) #> dplyr * 1.1.3 2023-09-03 [1] RSPM (R 4.3.0) @@ -669,7 +657,7 @@

Reproducibility#> plyr 1.8.9 2023-10-02 [1] RSPM (R 4.3.0) #> purrr 1.0.2 2023-08-10 [2] RSPM (R 4.3.0) #> R6 2.5.1 2021-08-19 [2] RSPM (R 4.3.0) -#> ragg 1.2.5 2023-01-12 [2] RSPM (R 4.3.0) +#> ragg 1.2.6 2023-10-10 [2] RSPM (R 4.3.0) #> Rcpp 1.0.11 2023-07-06 [2] RSPM (R 4.3.0) #> RefManageR * 1.4.0 2022-09-30 [1] RSPM (R 4.3.0) #> rlang 1.1.1 2023-04-28 [2] RSPM (R 4.3.0) @@ -677,7 +665,7 @@

Reproducibility#> rprojroot 2.0.3 2022-04-02 [2] RSPM (R 4.3.0) #> sass 0.4.7 2023-07-15 [2] RSPM (R 4.3.0) #> sessioninfo * 1.2.2 2021-12-06 [2] RSPM (R 4.3.0) -#> slurmjobs * 0.99.0 2023-10-10 [1] local +#> slurmjobs * 0.99.0 2023-10-12 [1] local #> stringi 1.7.12 2023-01-11 [2] RSPM (R 4.3.0) #> stringr 1.5.0 2022-12-02 [2] RSPM (R 4.3.0) #> systemfonts 1.0.5 2023-10-09 [2] RSPM (R 4.3.0) diff --git a/pkgdown.yml b/pkgdown.yml index b4b670d..668225a 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -3,5 +3,5 @@ pkgdown: 2.0.7 pkgdown_sha: ~ articles: slurmjobs: slurmjobs.html -last_built: 2023-10-10T17:29Z +last_built: 2023-10-12T18:50Z diff --git a/reference/array_submit.html b/reference/array_submit.html index 38e8651..8c05683 100644 --- a/reference/array_submit.html +++ b/reference/array_submit.html @@ -141,10 +141,10 @@

Examples

submit = FALSE ) }) -#> 2023-10-10 17:29:51.568844 creating the logs directory at: logs -#> 2023-10-10 17:29:51.570559 creating the shell file array_submit_example_2023-10-10.sh -#> To submit the job use: sbatch array_submit_example_2023-10-10.sh -#> [1] "array_submit_example_2023-10-10.sh" +#> 2023-10-12 18:50:05.478047 creating the logs directory at: logs +#> 2023-10-12 18:50:05.479613 creating the shell file array_submit_example_2023-10-12.sh +#> To submit the job use: sbatch array_submit_example_2023-10-12.sh +#> [1] "array_submit_example_2023-10-12.sh"

diff --git a/reference/job_loop.html b/reference/job_loop.html index a760bc1..ce26868 100644 --- a/reference/job_loop.html +++ b/reference/job_loop.html @@ -1,7 +1,6 @@ -Build a bash script that loops over variables and submits SLURM jobs — job_loop • slurmjobsBuild a bash script that loops over variables and submits SLURM jobs — job_loop • slurmjobs @@ -54,9 +53,8 @@

Build a bash script that loops over variables and submits SLURM jobs

-

This function builds a bash script that loops over a set of variables -with pre-specified values to create an internal bash script that then -gets submitted as a SLURM job.

+

This function builds a bash script functioning as an array job that loops +over a set of variables with pre-specified values.

@@ -67,10 +65,9 @@

Build a bash script that loops over variables and submits SLURM jobs

partition = "shared", memory = "10G", cores = 1L, + tc = 20, email = "ALL", - logdir = "logs", - task_num = NULL, - tc = 20 + logdir = "logs" )
@@ -108,6 +105,11 @@

Arguments

your job will request is cores multiplied by memory.

+
tc
+

If task_num is specified, this option controls the number of +concurrent tasks.

+ +
email

The email reporting option for the email address ("BEGIN", "END", "FAIL", or "ALL")

@@ -117,16 +119,6 @@

Arguments

The directory for the log files relative to the current working directory.

- -
task_num
-

The number of tasks for your job, which will make it into an -array job. If NULL this is ignored.

- - -
tc
-

If task_num is specified, this option controls the number of -concurrent tasks.

-

Value

@@ -138,8 +130,8 @@

Value

Author

-

Leonardo Collado-Torres

-

Nicholas J. Eagles

+

Nicholas J. Eagles

+

Leonardo Collado-Torres

@@ -147,140 +139,59 @@

Examples


 job_loop(
     loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
-    name = "bsp2_test"
-)
-#> #!/bin/bash
-#> 
-#> ## Usage:
-#> # sh bsp2_test.sh
-#> 
-#> ## Create the logs directory
-#> mkdir -p logs
-#> 
-#> for region in DLPFC HIPPO; do
-#>     for feature in gene exon tx jxn; do
-#> 
-#>     ## Internal script name
-#>     SHORT="bsp2_test_${region}_${feature}"
-#> 
-#>     # Construct shell file
-#>     echo "Creating script bsp2_test_${region}_${feature}"
-#>     cat > .${SHORT}.sh <<EOF
-#> #!/bin/bash
-#> #SBATCH -p shared
-#> #SBATCH --mem-per-cpu=10G
-#> #SBATCH --job-name=${SHORT}
-#> #SBATCH -c 1
-#> #SBATCH -o logs/${SHORT}.txt
-#> #SBATCH -e logs/${SHORT}.txt
-#> #SBATCH --mail-type=ALL
-#> 
-#> set -e
-#> 
-#> echo "**** Job starts ****"
-#> date
-#> 
-#> echo "**** JHPCE info ****"
-#> echo "User: \${USER}"
-#> echo "Job id: \${SLURM_JOB_ID}"
-#> echo "Job name: \${SLURM_JOB_NAME}"
-#> echo "Node name: \${SLURMD_NODENAME}"
-#> echo "Task id: \${SLURM_ARRAY_TASK_ID}"
-#> 
-#> ## Load the R module
-#> module load conda_R
-#> 
-#> ## List current modules for reproducibility
-#> module list
-#> 
-#> ## Edit with your job command
-#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
-#> 
-#> echo "**** Job ends ****"
-#> date
-#> 
-#> ## This script was made using slurmjobs version 0.99.0
-#> ## available from http://research.libd.org/slurmjobs/
-#> 
-#> 
-#> EOF
-#> 
-#>     call="sbatch .${SHORT}.sh"
-#>     echo $call
-#>     $call
-#>     done
-#> done
-#> 
-
-job_loop(
-    loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
-    cores = 5,
-    task_num = 10,
-    name = "bsp2_test_array"
+    name = "bsp2_test_array",
+    cores = 2
 )
-#> #!/bin/bash
-#> 
-#> ## Usage:
-#> # sh bsp2_test_array.sh
-#> 
-#> ## Create the logs directory
-#> mkdir -p logs
-#> 
-#> for region in DLPFC HIPPO; do
-#>     for feature in gene exon tx jxn; do
-#> 
-#>     ## Internal script name
-#>     SHORT="bsp2_test_array_${region}_${feature}"
-#> 
-#>     # Construct shell file
-#>     echo "Creating script bsp2_test_array_${region}_${feature}"
-#>     cat > .${SHORT}.sh <<EOF
-#> #!/bin/bash
-#> #SBATCH -p shared
-#> #SBATCH --mem-per-cpu=10G
-#> #SBATCH --job-name=${SHORT}
-#> #SBATCH -c 5
-#> #SBATCH -o logs/${SHORT}.%a.txt
-#> #SBATCH -e logs/${SHORT}.%a.txt
-#> #SBATCH --mail-type=ALL
-#> #SBATCH --array=1-10%20
-#> 
-#> set -e
-#> 
-#> echo "**** Job starts ****"
-#> date
-#> 
-#> echo "**** JHPCE info ****"
-#> echo "User: \${USER}"
-#> echo "Job id: \${SLURM_JOB_ID}"
-#> echo "Job name: \${SLURM_JOB_NAME}"
-#> echo "Node name: \${SLURMD_NODENAME}"
-#> echo "Task id: \${SLURM_ARRAY_TASK_ID}"
-#> 
-#> ## Load the R module
-#> module load conda_R
-#> 
-#> ## List current modules for reproducibility
-#> module list
-#> 
-#> ## Edit with your job command
-#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
-#> 
-#> echo "**** Job ends ****"
-#> date
-#> 
-#> ## This script was made using slurmjobs version 0.99.0
-#> ## available from http://research.libd.org/slurmjobs/
-#> 
-#> 
-#> EOF
-#> 
-#>     call="sbatch .${SHORT}.sh"
-#>     echo $call
-#>     $call
-#>     done
-#> done
-#> 
+#>  [1] "#!/bin/bash"                                                                                              
+#>  [2] "#SBATCH -p shared"                                                                                        
+#>  [3] "#SBATCH --mem-per-cpu=10G"                                                                                
+#>  [4] "#SBATCH --job-name=bsp2_test_array"                                                                       
+#>  [5] "#SBATCH -c 2"                                                                                             
+#>  [6] "#SBATCH -o /dev/null"                                                                                     
+#>  [7] "#SBATCH -e /dev/null"                                                                                     
+#>  [8] "#SBATCH --mail-type=ALL"                                                                                  
+#>  [9] "#SBATCH --array=1-8%20"                                                                                   
+#> [10] ""                                                                                                         
+#> [11] "## Define loops and appropriately subset each variable for the array task ID"                             
+#> [12] "all_region=(DLPFC HIPPO)"                                                                                 
+#> [13] "region=${all_region[$(( $SLURM_ARRAY_TASK_ID / 4 % 2 ))]}"                                                
+#> [14] ""                                                                                                         
+#> [15] "all_feature=(gene exon tx jxn)"                                                                           
+#> [16] "feature=${all_feature[$(( $SLURM_ARRAY_TASK_ID / 1 % 4 ))]}"                                              
+#> [17] ""                                                                                                         
+#> [18] "## Explicitly pipe script output to a log"                                                                
+#> [19] "log_path=logs/bsp2_test_array_${region}_${feature}_${SLURM_ARRAY_TASK_ID}.txt"                            
+#> [20] ""                                                                                                         
+#> [21] "{"                                                                                                        
+#> [22] "set -e"                                                                                                   
+#> [23] ""                                                                                                         
+#> [24] "echo \"**** Job starts ****\""                                                                            
+#> [25] "date"                                                                                                     
+#> [26] ""                                                                                                         
+#> [27] "echo \"**** JHPCE info ****\""                                                                            
+#> [28] "echo \"User: ${USER}\""                                                                                   
+#> [29] "echo \"Job id: ${SLURM_JOB_ID}\""                                                                         
+#> [30] "echo \"Job name: ${SLURM_JOB_NAME}\""                                                                     
+#> [31] "echo \"Node name: ${SLURMD_NODENAME}\""                                                                   
+#> [32] "echo \"Task id: ${SLURM_ARRAY_TASK_ID}\""                                                                 
+#> [33] ""                                                                                                         
+#> [34] "## Load the R module"                                                                                     
+#> [35] "module load conda_R"                                                                                      
+#> [36] ""                                                                                                         
+#> [37] "## List current modules for reproducibility"                                                              
+#> [38] "module list"                                                                                              
+#> [39] ""                                                                                                         
+#> [40] "## Edit with your job command"                                                                            
+#> [41] "Rscript -e \"options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()\""
+#> [42] ""                                                                                                         
+#> [43] "echo \"**** Job ends ****\""                                                                              
+#> [44] "date"                                                                                                     
+#> [45] ""                                                                                                         
+#> [46] "} > $log_path 2>&1"                                                                                       
+#> [47] ""                                                                                                         
+#> [48] "## This script was made using slurmjobs version 0.99.0"                                                   
+#> [49] "## available from http://research.libd.org/slurmjobs/"                                                    
+#> [50] ""