diff --git a/404.html b/404.html index fa12898..67625b4 100644 --- a/404.html +++ b/404.html @@ -24,7 +24,7 @@ dv.loader - 2.0.0 + 2.1.0 diff --git a/LICENSE-text.html b/LICENSE-text.html index 145debc..8e495a2 100644 --- a/LICENSE-text.html +++ b/LICENSE-text.html @@ -10,7 +10,7 @@ dv.loader - 2.0.0 + 2.1.0 diff --git a/articles/index.html b/articles/index.html index 7e3abcf..97d7de6 100644 --- a/articles/index.html +++ b/articles/index.html @@ -10,7 +10,7 @@ dv.loader - 2.0.0 + 2.1.0 diff --git a/articles/loading-data-into-memory.html b/articles/loading-data-into-memory.html index 57362c1..2c79625 100644 --- a/articles/loading-data-into-memory.html +++ b/articles/loading-data-into-memory.html @@ -26,7 +26,7 @@ dv.loader - 2.0.0 + 2.1.0 @@ -76,46 +76,139 @@ +The dv.loader package simplifies the process of loading data files into R memory. It provides two main functions - load_data() and load_files() - that can handle two widely used data formats: + + +.rds files: R’s native data storage format, which efficiently stores R objects in a compressed binary format + +.sas7bdat files: SAS dataset files commonly used in clinical research and other industries + +The package is designed to be flexible, allowing you to load data either from a centralized location using environment variables, or by specifying explicit file paths. Each loaded dataset includes metadata about the source file, such as its size, modification time, and location on disk. +To demonstrate the package’s capabilities, we’ll first create some example .rds files in a temporary directory that we can work with. +# Create a temporary directory for the example data +temp_dir <- tempdir() + +# Save the cars and mtcars datasets to the temporary directory +saveRDS(cars, file = file.path(temp_dir, "cars.rds")) +saveRDS(mtcars, file = file.path(temp_dir, "mtcars.rds")) +To begin, we’ll need to load the dv.loader package. + library(dv.loader) -Note: use_wd = TRUE can be used to source from local folder. Just set your working directory before using with setwd(), or use an explicit path in sub_dir. -Usage: load_data() +Using load_data() - -test_data_path <- "../tests/testthat/inst/extdata" -data_list <- load_data( - sub_dir = test_data_path, - file_names = "dummyads2", - use_wd = TRUE -) - +The load_data() function requires the RXD_DATA environment variable to be set to the base directory containing your data files. This variable defines the root path from which subdirectories will be searched. +When you call load_data(), it searches the specified subdirectory for data files and returns them as a named list of data frames. Each data frame in the list is named after its source file. +For files that exist in both .rds and .sas7bdat formats, load_data() will load the .rds version by default since these are more compact and faster to read. You can override this behavior by setting prefer_sas = TRUE to prioritize loading .sas7bdat files instead. + +# Set the RXD_DATA environment variable to the temporary directory +Sys.setenv(RXD_DATA = temp_dir) -class(data_list) -#> [1] "list" +# Load the data files into a named list of data frames +data_list1 <- load_data( + sub_dir = ".", + file_names = c("cars", "mtcars") +) -class(data_list[["dummyads2"]]) -#> [1] "tbl_df" "tbl" "data.frame" +# Display the structure of the resulting list +str(data_list1) +#> List of 2 +#> $ cars :'data.frame': 50 obs. of 2 variables: +#> ..$ speed: num [1:50] 4 4 7 7 8 9 10 10 10 11 ... +#> ..$ dist : num [1:50] 2 10 4 22 16 10 18 26 34 17 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 289 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./cars.rds" +#> .. ..$ file_name: chr "cars.rds" +#> $ mtcars:'data.frame': 32 obs. of 11 variables: +#> ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... +#> ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ... +#> ..$ disp: num [1:32] 160 160 108 258 360 ... +#> ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ... +#> ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... +#> ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ... +#> ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ... +#> ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ... +#> ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ... +#> ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ... +#> ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 1225 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./mtcars.rds" +#> .. ..$ file_name: chr "mtcars.rds" + + +Using load_files() + +The load_files() function accepts explicit file paths and loads them into a named list of data frames. Each data frame includes metadata as an attribute. If no custom names are provided, the function will use the file names (without paths or extensions) as the list names. + +# Load the data files into a named list of data frames +data_list2 <- load_files( + file_paths = c( + file.path(temp_dir, "cars.rds"), + file.path(temp_dir, "mtcars.rds") + ) +) -head(data_list[["dummyads2"]]) -#> STUDYID USUBJID SUBJID TRTFL PPROTFL TRT01PNDC TRT01PN -#> 1 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 2 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 3 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 4 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 5 test-0001 test-0001-102 102 Y Y Placebo 2000 -#> 6 test-0001 test-0001-102 102 Y Y Placebo 2000 -Get the dataframe’s metadata through its attributes: - -attr(data_list[["dummyads2"]], "meta") -#> size isdir mode mtime ctime atime -#> 1 449 FALSE 644 2024-07-08 05:22:03 2024-07-08 05:22:03 2024-07-08 05:22:03 -#> path -#> 1 /__w/dv.loader/dv.loader/vignettes/../tests/testthat/inst/extdata/dummyads2.RDS -#> file_name -#> 1 dummyads2 +# Display the structure of the resulting list +str(data_list2) +#> List of 2 +#> $ cars :'data.frame': 50 obs. of 2 variables: +#> ..$ speed: num [1:50] 4 4 7 7 8 9 10 10 10 11 ... +#> ..$ dist : num [1:50] 2 10 4 22 16 10 18 26 34 17 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 289 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/cars.rds" +#> .. ..$ file_name: chr "cars.rds" +#> $ mtcars:'data.frame': 32 obs. of 11 variables: +#> ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... +#> ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ... +#> ..$ disp: num [1:32] 160 160 108 258 360 ... +#> ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ... +#> ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... +#> ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ... +#> ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ... +#> ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ... +#> ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ... +#> ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ... +#> ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 1225 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/mtcars.rds" +#> .. ..$ file_name: chr "mtcars.rds" +When using load_files(), you can specify files from multiple directories and customize the output list names by providing named arguments in the file_paths parameter. + +dv.loader::load_files( + file_paths = c( + "cars (rds)" = file.path(temp_dir, "cars.rds"), + "iris (sas)" = system.file("examples", "iris.sas7bdat", package = "haven") + ) +) |> names() +#> [1] "cars (rds)" "iris (sas)" - + diff --git a/articles/qc.html b/articles/qc.html index 1931284..db38cfb 100644 --- a/articles/qc.html +++ b/articles/qc.html @@ -26,7 +26,7 @@ dv.loader - 2.0.0 + 2.1.0 @@ -95,9 +95,9 @@ } -✅ dv.loader 2.0.0 +✅ dv.loader 2.1.0 -Date: 2024-Jul-08 05:22:29 +Date: 2024-Nov-28 10:11:06 The following document generates a report for R packages, to satisfy the criteria of a “Released” status under the Non-GxP project. The QC report contains the following information: @@ -126,39 +126,39 @@ Traceability matrix Summary - - + + Passed tests - - + + Failed tests - - + + Skipped tests - - + + Uncovered specifications - - + + Undeclared specifications This should always be empty, as non existant specs are controlled during test execution. - - + + @@ -174,7 +174,7 @@ Session Info and System Configura #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz Etc/UTC -#> date 2024-07-08 +#> date 2024-11-28 #> pandoc 2.9.2.1 @ /usr/bin/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── @@ -230,7 +230,7 @@ Session Info and System Configura #> xtable 1.8-4 2019-04-21 [2] RSPM #> yaml 2.3.8 2023-12-11 [2] RSPM #> -#> [1] /tmp/Rtmp5TSXGD/temp_libpath28a3f0273d8 +#> [1] /tmp/RtmpHTSToD/temp_libpath2d33ecd39da #> [2] /usr/local/lib/R/site-library #> [3] /usr/local/lib/R/library #> @@ -239,8 +239,8 @@ Session Info and System Configura List of specifications - - + + diff --git a/authors.html b/authors.html index be8d80e..b5f0703 100644 --- a/authors.html +++ b/authors.html @@ -10,7 +10,7 @@ dv.loader - 2.0.0 + 2.1.0 @@ -74,13 +74,13 @@ Citation Yang M, Brooks S, Voicu S (2024). dv.loader: Data loading module. -R package version 2.0.0. +R package version 2.1.0. @Manual{, title = {dv.loader: Data loading module}, author = {Ming Yang and Steven Brooks and Sorin Voicu}, year = {2024}, - note = {R package version 2.0.0}, + note = {R package version 2.1.0}, }
The dv.loader package simplifies the process of loading data files into R memory. It provides two main functions - load_data() and load_files() - that can handle two widely used data formats:
dv.loader
load_data()
load_files()
.rds
.sas7bdat
The package is designed to be flexible, allowing you to load data either from a centralized location using environment variables, or by specifying explicit file paths. Each loaded dataset includes metadata about the source file, such as its size, modification time, and location on disk.
To demonstrate the package’s capabilities, we’ll first create some example .rds files in a temporary directory that we can work with.
+# Create a temporary directory for the example data +temp_dir <- tempdir() + +# Save the cars and mtcars datasets to the temporary directory +saveRDS(cars, file = file.path(temp_dir, "cars.rds")) +saveRDS(mtcars, file = file.path(temp_dir, "mtcars.rds"))
# Create a temporary directory for the example data +temp_dir <- tempdir() + +# Save the cars and mtcars datasets to the temporary directory +saveRDS(cars, file = file.path(temp_dir, "cars.rds")) +saveRDS(mtcars, file = file.path(temp_dir, "mtcars.rds"))
To begin, we’ll need to load the dv.loader package.
library(dv.loader)
Note: use_wd = TRUE can be used to source from local folder. Just set your working directory before using with setwd(), or use an explicit path in sub_dir.
use_wd = TRUE
setwd()
sub_dir
-test_data_path <- "../tests/testthat/inst/extdata" -data_list <- load_data( - sub_dir = test_data_path, - file_names = "dummyads2", - use_wd = TRUE -) - +The load_data() function requires the RXD_DATA environment variable to be set to the base directory containing your data files. This variable defines the root path from which subdirectories will be searched. +When you call load_data(), it searches the specified subdirectory for data files and returns them as a named list of data frames. Each data frame in the list is named after its source file. +For files that exist in both .rds and .sas7bdat formats, load_data() will load the .rds version by default since these are more compact and faster to read. You can override this behavior by setting prefer_sas = TRUE to prioritize loading .sas7bdat files instead. + +# Set the RXD_DATA environment variable to the temporary directory +Sys.setenv(RXD_DATA = temp_dir) -class(data_list) -#> [1] "list" +# Load the data files into a named list of data frames +data_list1 <- load_data( + sub_dir = ".", + file_names = c("cars", "mtcars") +) -class(data_list[["dummyads2"]]) -#> [1] "tbl_df" "tbl" "data.frame" +# Display the structure of the resulting list +str(data_list1) +#> List of 2 +#> $ cars :'data.frame': 50 obs. of 2 variables: +#> ..$ speed: num [1:50] 4 4 7 7 8 9 10 10 10 11 ... +#> ..$ dist : num [1:50] 2 10 4 22 16 10 18 26 34 17 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 289 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./cars.rds" +#> .. ..$ file_name: chr "cars.rds" +#> $ mtcars:'data.frame': 32 obs. of 11 variables: +#> ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... +#> ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ... +#> ..$ disp: num [1:32] 160 160 108 258 360 ... +#> ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ... +#> ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... +#> ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ... +#> ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ... +#> ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ... +#> ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ... +#> ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ... +#> ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 1225 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./mtcars.rds" +#> .. ..$ file_name: chr "mtcars.rds" +
test_data_path <- "../tests/testthat/inst/extdata" -data_list <- load_data( - sub_dir = test_data_path, - file_names = "dummyads2", - use_wd = TRUE -) - +The load_data() function requires the RXD_DATA environment variable to be set to the base directory containing your data files. This variable defines the root path from which subdirectories will be searched. +When you call load_data(), it searches the specified subdirectory for data files and returns them as a named list of data frames. Each data frame in the list is named after its source file. +For files that exist in both .rds and .sas7bdat formats, load_data() will load the .rds version by default since these are more compact and faster to read. You can override this behavior by setting prefer_sas = TRUE to prioritize loading .sas7bdat files instead. + +# Set the RXD_DATA environment variable to the temporary directory +Sys.setenv(RXD_DATA = temp_dir) -class(data_list) -#> [1] "list" +# Load the data files into a named list of data frames +data_list1 <- load_data( + sub_dir = ".", + file_names = c("cars", "mtcars") +) -class(data_list[["dummyads2"]]) -#> [1] "tbl_df" "tbl" "data.frame" +# Display the structure of the resulting list +str(data_list1) +#> List of 2 +#> $ cars :'data.frame': 50 obs. of 2 variables: +#> ..$ speed: num [1:50] 4 4 7 7 8 9 10 10 10 11 ... +#> ..$ dist : num [1:50] 2 10 4 22 16 10 18 26 34 17 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 289 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./cars.rds" +#> .. ..$ file_name: chr "cars.rds" +#> $ mtcars:'data.frame': 32 obs. of 11 variables: +#> ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... +#> ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ... +#> ..$ disp: num [1:32] 160 160 108 258 360 ... +#> ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ... +#> ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... +#> ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ... +#> ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ... +#> ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ... +#> ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ... +#> ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ... +#> ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 1225 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./mtcars.rds" +#> .. ..$ file_name: chr "mtcars.rds" +
The load_data() function requires the RXD_DATA environment variable to be set to the base directory containing your data files. This variable defines the root path from which subdirectories will be searched.
RXD_DATA
When you call load_data(), it searches the specified subdirectory for data files and returns them as a named list of data frames. Each data frame in the list is named after its source file.
For files that exist in both .rds and .sas7bdat formats, load_data() will load the .rds version by default since these are more compact and faster to read. You can override this behavior by setting prefer_sas = TRUE to prioritize loading .sas7bdat files instead.
prefer_sas = TRUE
+# Set the RXD_DATA environment variable to the temporary directory +Sys.setenv(RXD_DATA = temp_dir) -class(data_list) -#> [1] "list" +# Load the data files into a named list of data frames +data_list1 <- load_data( + sub_dir = ".", + file_names = c("cars", "mtcars") +) -class(data_list[["dummyads2"]]) -#> [1] "tbl_df" "tbl" "data.frame" +# Display the structure of the resulting list +str(data_list1) +#> List of 2 +#> $ cars :'data.frame': 50 obs. of 2 variables: +#> ..$ speed: num [1:50] 4 4 7 7 8 9 10 10 10 11 ... +#> ..$ dist : num [1:50] 2 10 4 22 16 10 18 26 34 17 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 289 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./cars.rds" +#> .. ..$ file_name: chr "cars.rds" +#> $ mtcars:'data.frame': 32 obs. of 11 variables: +#> ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... +#> ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ... +#> ..$ disp: num [1:32] 160 160 108 258 360 ... +#> ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ... +#> ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... +#> ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ... +#> ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ... +#> ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ... +#> ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ... +#> ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ... +#> ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 1225 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./mtcars.rds" +#> .. ..$ file_name: chr "mtcars.rds"
# Set the RXD_DATA environment variable to the temporary directory +Sys.setenv(RXD_DATA = temp_dir) -class(data_list) -#> [1] "list" +# Load the data files into a named list of data frames +data_list1 <- load_data( + sub_dir = ".", + file_names = c("cars", "mtcars") +) -class(data_list[["dummyads2"]]) -#> [1] "tbl_df" "tbl" "data.frame" +# Display the structure of the resulting list +str(data_list1) +#> List of 2 +#> $ cars :'data.frame': 50 obs. of 2 variables: +#> ..$ speed: num [1:50] 4 4 7 7 8 9 10 10 10 11 ... +#> ..$ dist : num [1:50] 2 10 4 22 16 10 18 26 34 17 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 289 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./cars.rds" +#> .. ..$ file_name: chr "cars.rds" +#> $ mtcars:'data.frame': 32 obs. of 11 variables: +#> ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... +#> ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ... +#> ..$ disp: num [1:32] 160 160 108 258 360 ... +#> ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ... +#> ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... +#> ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ... +#> ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ... +#> ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ... +#> ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ... +#> ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ... +#> ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ... +#> ..- attr(*, "meta")='data.frame': 1 obs. of 8 variables: +#> .. ..$ size : num 1225 +#> .. ..$ isdir : logi FALSE +#> .. ..$ mode : 'octmode' int 644 +#> .. ..$ mtime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ ctime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ atime : POSIXct[1:1], format: "2024-11-28 10:11:03" +#> .. ..$ path : chr "/tmp/RtmpjC6516/./mtcars.rds" +#> .. ..$ file_name: chr "mtcars.rds"
The load_files() function accepts explicit file paths and loads them into a named list of data frames. Each data frame includes metadata as an attribute. If no custom names are provided, the function will use the file names (without paths or extensions) as the list names.
+# Load the data files into a named list of data frames +data_list2 <- load_files( + file_paths = c( + file.path(temp_dir, "cars.rds"), + file.path(temp_dir, "mtcars.rds") + ) +) -head(data_list[["dummyads2"]]) -#> STUDYID USUBJID SUBJID TRTFL PPROTFL TRT01PNDC TRT01PN -#> 1 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 2 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 3 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 4 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 5 test-0001 test-0001-102 102 Y Y Placebo 2000 -#> 6 test-0001 test-0001-102 102 Y Y Placebo 2000
# Load the data files into a named list of data frames +data_list2 <- load_files( + file_paths = c( + file.path(temp_dir, "cars.rds"), + file.path(temp_dir, "mtcars.rds") + ) +) -head(data_list[["dummyads2"]]) -#> STUDYID USUBJID SUBJID TRTFL PPROTFL TRT01PNDC TRT01PN -#> 1 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 2 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 3 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 4 test-0001 test-0001-101 101 Y Y BI 10mg 50100 -#> 5 test-0001 test-0001-102 102 Y Y Placebo 2000 -#> 6 test-0001 test-0001-102 102 Y Y Placebo 2000
Get the dataframe’s metadata through its attributes:
-attr(data_list[["dummyads2"]], "meta") -#> size isdir mode mtime ctime atime -#> 1 449 FALSE 644 2024-07-08 05:22:03 2024-07-08 05:22:03 2024-07-08 05:22:03 -#> path -#> 1 /__w/dv.loader/dv.loader/vignettes/../tests/testthat/inst/extdata/dummyads2.RDS -#> file_name -#> 1 dummyads2
attr(data_list[["dummyads2"]], "meta") -#> size isdir mode mtime ctime atime -#> 1 449 FALSE 644 2024-07-08 05:22:03 2024-07-08 05:22:03 2024-07-08 05:22:03 -#> path -#> 1 /__w/dv.loader/dv.loader/vignettes/../tests/testthat/inst/extdata/dummyads2.RDS -#> file_name -#> 1 dummyads2
When using load_files(), you can specify files from multiple directories and customize the output list names by providing named arguments in the file_paths parameter.
file_paths
+dv.loader::load_files( + file_paths = c( + "cars (rds)" = file.path(temp_dir, "cars.rds"), + "iris (sas)" = system.file("examples", "iris.sas7bdat", package = "haven") + ) +) |> names() +#> [1] "cars (rds)" "iris (sas)"
dv.loader::load_files( + file_paths = c( + "cars (rds)" = file.path(temp_dir, "cars.rds"), + "iris (sas)" = system.file("examples", "iris.sas7bdat", package = "haven") + ) +) |> names() +#> [1] "cars (rds)" "iris (sas)"
Date: 2024-Jul-08 05:22:29
Date: 2024-Nov-28 10:11:06
The following document generates a report for R packages, to satisfy the criteria of a “Released” status under the Non-GxP project. The QC report contains the following information:
This should always be empty, as non existant specs are controlled during test execution.
Yang M, Brooks S, Voicu S (2024). dv.loader: Data loading module. -R package version 2.0.0. +R package version 2.1.0.
@Manual{, title = {dv.loader: Data loading module}, author = {Ming Yang and Steven Brooks and Sorin Voicu}, year = {2024}, - note = {R package version 2.0.0}, + note = {R package version 2.1.0}, }