diff --git a/.Rbuildignore b/.Rbuildignore index 5b3bdae..b14d5a7 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,4 @@ ^README\.Rmd$ ^\.github$ ^codecov\.yml$ +^data-raw$ diff --git a/DESCRIPTION b/DESCRIPTION index bab4e91..77922b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,3 +31,6 @@ Imports: stringr, purrr, utils +Depends: + R (>= 2.10) +LazyData: true diff --git a/dev/interactive_testing/clean_job_info.R b/data-raw/job_info_df.R similarity index 55% rename from dev/interactive_testing/clean_job_info.R rename to data-raw/job_info_df.R index 66a0d7a..0b0050f 100644 --- a/dev/interactive_testing/clean_job_info.R +++ b/data-raw/job_info_df.R @@ -1,29 +1,26 @@ -library(here) library(dplyr) -library(stringr) -source(here('R', 'job_info.R')) # Randomly grab 100 jobs running now on the 'shared' partition -job_df = job_info(user = NULL) |> +job_info_df = job_info(user = NULL) |> sample_n(size = 100) |> arrange(job_id) # A vector whose values are anonymous usernames and whose names are the # original usernames -user_map = paste0('user', 1:length(unique(job_df$user))) -names(user_map) = unique(job_df$user) +user_map = paste0('user', 1:length(unique(job_info_df$user))) +names(user_map) = unique(job_info_df$user) # Similarly for job names, though we'll keep the generic name for interactive # jobs ('bash') -name_map = paste0('my_job_', 1:length(unique(job_df$name))) -names(name_map) = unique(job_df$name) +name_map = paste0('my_job_', 1:length(unique(job_info_df$name))) +names(name_map) = unique(job_info_df$name) name_map['bash'] = 'bash' # Anonymize username and job name -job_df = job_df |> +job_info_df = job_info_df |> mutate( user = user_map[user], name = name_map[name] ) -saveRDS(job_df, here('inst', 'extdata', 'job_info_df.rds')) +usethis::use_data(job_info_df, overwrite = TRUE) diff --git a/data/job_info_df.rda b/data/job_info_df.rda new file mode 100644 index 0000000..8cc5126 Binary files /dev/null and b/data/job_info_df.rda differ diff --git a/vignettes/slurmjobs.Rmd b/vignettes/slurmjobs.Rmd index 7ffa12b..40a71b4 100644 --- a/vignettes/slurmjobs.Rmd +++ b/vignettes/slurmjobs.Rmd @@ -178,22 +178,19 @@ array_submit("my_array_job.sh", submit = FALSE) The `job_info()` function provides wrappers around the `squeue` and `sstat` utilities SLURM provides for monitoring specific jobs and how busy partitions are. The general idea is to provide the information output from `squeue` into a `tibble`, while retrieving memory-utilization information that ordinarily must be retrieved manually on a job-by-job basis with `sstat -j [specific job ID]`. -On a SLURM system, you'd run `job_df = job_info(user = NULL, partition = "shared")` here, to get every user's jobs running on the "shared" partition. We'll load an example output directly here. +On a SLURM system, you'd run `job_info_df = job_info(user = NULL, partition = "shared")` here, to get every user's jobs running on the "shared" partition. We'll load an example output directly here. ```{r "job_info_quick_look"} # On a real SLURM system -job_df <- readRDS( - system.file("extdata", "job_info_df.rds", package = "slurmjobs") -) -print(job_df) +print(job_info_df) ``` The benefit to having this data in R, now, is to be able to trivially ask summarizing questions. First, "how much memory and how many CPUs am I currently using?" Knowing this answer can help ensure fair and civil use of shared computing resources, for example on a computing cluster. ```{r "job_info_total_resources"} -job_df |> +job_info_df |> # Or your username here - filter(user == "user17") |> + filter(user == "user21") |> # Get the number of CPUs requested and the memory requested in GB summarize( total_mem_req = sum(requested_mem_gb),