Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version: 0.0.1.9010 #81

Merged
merged 8 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: metasurvey
Title: Survey Processing with Meta-Programming
Version: 0.0.1.9008
Version: 0.0.1.9010
URL: https://github.com/metasurveyr/metasurvey
Authors@R:
c(
Expand Down Expand Up @@ -44,7 +44,8 @@ Suggests:
knitr (>= 1.33),
foreign (>= 0.8-81),
rmarkdown (>= 2.11),
parallel (>= 4.1.1)
parallel (>= 4.1.1),
rio (>= 0.5.27)
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(get_recipe)
export(get_steps)
export(group_dates)
export(lazy_default)
export(load_panel_survey)
export(load_survey)
export(load_survey_example)
export(read_recipe)
Expand Down
10 changes: 3 additions & 7 deletions R/PanelSurvey.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,9 @@ extract_surveys <- function(RotativePanelSurvey, index = NULL, monthly = NULL, a

if (!is.null(annual)) {
results$annual <- list()
if (RotativePanelSurvey$implantation$periodicity != "Annual") {
for (year in annual) {
indices <- apply_interval(ts_series, year, 1, year, 12)
results$annual[[as.character(year)]] <- follow_up[indices]
}
} else {
results$annual[["implantation"]] <- list(RotativePanelSurvey$implantation)
for (year in annual) {
indices <- apply_interval(ts_series, year, 1, year, 12)
results$annual[[as.character(year)]] <- follow_up[indices]
}
}

Expand Down
179 changes: 158 additions & 21 deletions R/load_survey.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@
#' @examples
#' set_engine("data.table")
#' svy_example <- load_survey(
#' "https://raw.githubusercontent.com/metasurveyr/metasurvey_data/main/eaii/2019-2021.csv",
#' load_survey_example(
#' svy_type = "eaii",
#' svy_edition = "2019-2021"
#' ),
#' svy_type = "eaii",
#' svy_edition = "2019-2021",
#' svy_edition = "eaii_2019-2021",
#' svy_weight = add_weight(annual = "w_trans"),
#' dec = ","
#' )
Expand Down Expand Up @@ -82,6 +85,7 @@ load_survey <- function(
#' @param svy_weight_follow_up Weight of the follow-up survey
#' @keywords preprocessing
#' @return RotativePanelSurvey object
#' @export

load_panel_survey <- function(
path_implantation,
Expand All @@ -92,33 +96,144 @@ load_panel_survey <- function(
names_survey <- gsub(
"\\..*",
"",
list.files(path_follow_up, full.names = FALSE)
list.files(path_follow_up, full.names = FALSE, pattern = ".csv")
)

path_survey <- list.files(path_follow_up, full.names = TRUE)
if (length(names(svy_weight_follow_up)) > 1) {
stop(
"The follow-up survey must have a single weight time pattern"
)
}

time_pattern_follow_up <- names(svy_weight_follow_up)

if (is(svy_weight_follow_up[[1]], "list")) {
svy_weight_follow_up <- svy_weight_follow_up[[1]]
}

path_survey <- list.files(path_follow_up, full.names = TRUE, pattern = ".csv")



names(path_survey) <- names_survey


implantation <- load_survey(
path_implantation,
svy_type = svy_type,
svy_edition = "2023",
svy_edition = basename(path_implantation),
svy_weight = svy_weight_implantation
)

follow_up <- lapply(
X = names(path_survey),
FUN = function(x) {
load_survey(
path_survey[[x]],
svy_type = svy_type,
svy_edition = x,
svy_weight = svy_weight_follow_up
)
if (!is.null(svy_weight_follow_up$replicate_path)) {
path_file <- svy_weight_follow_up$replicate_path
path_file_final <- c()

for (i in path_file) {
if (file.info(i)$isdir) {
path_file_final <- c(path_file_final, list.files(i, full.names = TRUE, pattern = ".rds"))
} else {
path_file_final <- c(path_file_final, i)
}
}
)

names(follow_up) <- names_survey
names_year_month <- sapply(
X = basename(path_file_final),
FUN = function(x) {
time_pattern <- extract_time_pattern(x)
if (time_pattern$periodicity != "Monthly") {
stop(
message(
"The periodicity of the file is not monthly"
)
)
} else {
return(
time_pattern$year * 100 + time_pattern$month
)
}
},
USE.NAMES = FALSE
)

names(path_file_final) <- names_year_month


svy_weight_follow_up <- lapply(
X = as.character(names_year_month),
FUN = function(x) {
replicate <- list(
add_replicate(
"W",
replicate_path = unname(path_file_final[x]),
replicate_id = c("ID" = "ID"),
replicate_pattern = "wr[0-9]+",
replicate_type = "bootstrap"
)
)

names(replicate) <- time_pattern_follow_up

return(replicate)
}
)

names(svy_weight_follow_up) <- names_year_month

names_path_survey_year_month <- sapply(
X = names(path_survey),
FUN = function(x) {
time_pattern <- extract_time_pattern(x)
if (time_pattern$periodicity != "Monthly") {
stop(
message(
"The periodicity of the file is not monthly"
)
)
} else {
return(
time_pattern$year * 100 + time_pattern$month
)
}
},
USE.NAMES = FALSE
)

names(path_survey) <- names_path_survey_year_month

follow_up <- lapply(
X = 1:length(path_survey),
FUN = function(x) {
y <- path_survey[[x]]
z <- names(path_survey)[x]
svy_weight <- unname(svy_weight_follow_up[z])[[1]]


load_survey(
y,
svy_type = svy_type,
svy_edition = basename(y),
svy_weight = svy_weight
)
}
)

names(follow_up) <- names_survey
} else {
follow_up <- lapply(
X = names(path_survey),
FUN = function(x) {
load_survey(
path_survey[[x]],
svy_type = svy_type,
svy_edition = x,
svy_weight = svy_weight_follow_up
)
}
)

names(follow_up) <- names_survey
}

return(
RotativePanelSurvey$new(
Expand All @@ -143,19 +258,41 @@ load_panel_survey <- function(

read_file <- function(file, .args = NULL) {
.extension <- gsub(".*\\.", "", file)
.file_name <- basename(file)
.file_name <- gsub("\\..*", "", .file_name)
.path_without_extension <- gsub("\\..*", "", file)
.output_file <- paste0(.path_without_extension, ".csv")



if (.extension != ".csv" && !file.exists(.output_file)) {
requireNamespace("rio", quietly = TRUE)

rio::convert(
in_file = file,
out_file = .output_file
)
.extension <- "csv"
} else {
.extension <- "csv"
}

.read_function <- switch(.extension,
sav = list(package = "foreign", read_function = "read.spss"),
dta = list(package = "foreign", read_function = "read.dta"),
csv = list(package = "data.table", read_function = "fread"),
xlsx = list(package = "openxlsx", read_function = "read.xlsx"),
rds = list(package = "base", read_function = "readRDS"),
stop("Unsupported file type: ", .extension)
)

require(.read_function$package, character.only = TRUE)

if (is.null(.args)) {
.args <- list(file)
.args <- list(.output_file)
names(.args) <- names(formals(.read_function$read_function)[1])
} else {
.args$file <- .output_file
}

.names_args <- names(.args)
Expand All @@ -164,7 +301,8 @@ read_file <- function(file, .args = NULL) {

.names_args <- .names_args[!.names_args %in% .metadata_args]

do.call(.read_function$read_function, args = .args[.names_args])
df <- do.call(.read_function$read_function, args = .args[.names_args])
return(data.table::data.table(df))
}


Expand Down Expand Up @@ -224,8 +362,6 @@ load_survey.data.table <- function(...) {
}




Survey <- Survey$new(
data = svy,
edition = .args$svy_edition,
Expand All @@ -236,7 +372,8 @@ load_survey.data.table <- function(...) {
recipes = .args$recipes %||% NULL
)

print(.args$bake)


if (.args$bake %||% FALSE) {
return(bake_recipes(Survey))
} else {
Expand Down
4 changes: 0 additions & 4 deletions R/steps.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,6 @@ step_compute_survey <- function(svy, ..., .by = NULL, use_copy = use_copy_defaul
not_in_data <- !(.new_vars %in% names_vars)
.new_vars <- .new_vars[not_in_data]

if (length(.new_vars) == 0) {
stop("No new variable created")
}

step <- Step$new(
name = paste("New variable:", paste(.new_vars, collapse = ", ")),
edition = get_edition(svy),
Expand Down
33 changes: 27 additions & 6 deletions R/survey.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@ Survey <- R6Class(
recipes = list(),
workflows = list(),
design = NULL,
initialize = function(data, edition, type, psu, engine, weight, design = NULL, steps = NULL, recipes = NULL) {
initialize = function(data, edition, type, psu, engine, weight, design = NULL, steps = NULL, recipes = list()) {
self$data <- data

time_pattern <- validate_time_pattern(
svy_type = type,
svy_edition = edition
)


weight_list <- validate_weight_time_pattern(data, weight)

design_list <- lapply(
Expand All @@ -38,13 +39,28 @@ Survey <- R6Class(
calibrate.formula = ~1
)
} else {
survey::svrepdesign(
aux_vars <- c(x$weight, x$replicate_id)
data_aux <- data[, aux_vars, with = FALSE]
data_aux <- merge(
x$replicate_file[, 1:11],
data_aux,
by.x = names(x$replicate_id),
by.y = x$replicate_id
)

design <- survey::svrepdesign(
id = psu,
weights = as.formula(paste("~", x$weight)),
data = merge(data, x$replicate_file, by.x = names(x$replicate_id), by.y = x$replicate_id),
data = data_aux,
repweights = x$replicate_pattern,
type = x$replicate_type
)


data <- merge(data, x$replicate_file, by.x = names(x$replicate_id), by.y = x$replicate_id)
design$variables <- data
design$repweights <- x$replicate_file
return(design)
}
}
)
Expand All @@ -56,7 +72,7 @@ Survey <- R6Class(
self$default_engine <- engine
self$weight <- weight_list
self$design <- design_list
self$recipes <- list(recipes)
self$recipes <- if (is.null(recipes)) list() else list(recipes)
self$workflows <- list()
self$periodicity <- time_pattern$svy_periodicity
},
Expand Down Expand Up @@ -94,6 +110,12 @@ Survey <- R6Class(
self$update_design()
},
add_recipe = function(recipe, bake = lazy_default()) {
if ((self$edition != recipe$edition)) {
stop("Invalid Recipe: \n", recipe$name, "\nEdition of survey: ", self$edition, "\nEdition of recipe: ", recipe$edition)
}



index_recipe <- length(self$recipes) + 1
self$recipes[[index_recipe]] <- recipe
},
Expand Down Expand Up @@ -147,7 +169,6 @@ Survey <- R6Class(
)
} else {
survey::svrepdesign(
id = ~1,
weights = as.formula(paste("~", x$weight)),
data = merge(self$data, x$replicate_file, by.x = names(x$replicate_id), by.y = x$replicate_id),
repweights = x$replicate_pattern,
Expand Down Expand Up @@ -563,7 +584,7 @@ cat_design_type <- function(self, design_name) {
#'

cat_recipes <- function(self) {
if (is.null(self$recipes)) {
if (is.null(self$recipes) || length(self$recipes) == 0) {
return("None")
}

Expand Down
Loading
Loading