epiverse-trace · CarmenTamayo · Jun 19, 2024 · Jun 19, 2024 · Jun 19, 2024 · joshwlambert
diff --git a/R/cfr_ci.R b/R/cfr_ci.R
@@ -0,0 +1,15 @@
+# Function to estimate mean CFR with 95% CI from a numeric vector
+cfr_ci <- function(x) {
+  mean <- mean(x)
+  sd <- sd(x)
+  sem <- sd/ sqrt(length(x))
+  margin_of_error <- sem * 1.96
+  lower <- round(mean - margin_of_error, 2)
+  upper <- round(mean + margin_of_error, 2)
+  result <- list(
+    mean = round(mean, 2),
+    lower_ci = lower,
+    upper_ci = upper
+  )
+  return(result)
+}
diff --git a/R/convert_to_incidence.R b/R/convert_to_incidence.R
@@ -0,0 +1,17 @@
+# Function to convert to incidence
+convert_to_incidence <- function(sim) {
+  incidence_data <- incidence2::incidence(sim,
+                                          date_index = c("date_onset", "date_death", "date_recovery"),
+                                          interval = 1L) %>%
+    complete_dates() %>%
+    pivot_wider(names_from = count_variable, values_from = count)
+  #Vector with names for renaming
+  rename_vec <- c("date_index" = "date", "date_onset" = "cases", "date_death" = "deaths" , "date_recovery" = "recovered")
+  # Rename columns conditionally
+  for (old_name in names(rename_vec)) {
+    if (old_name %in% colnames(incidence_data)) {
+      colnames(incidence_data)[colnames(incidence_data) == old_name] <- rename_vec[[old_name]]
+    }
+  }
+  return(incidence_data)
+}
diff --git a/R/estimate_cfr.R b/R/estimate_cfr.R
@@ -0,0 +1,8 @@
+# Function to calculate cfr by specifying columns from a dataframe with
+# deaths and cases
+estimate_cfr <- function(df, numerator, denominator) {
+  total_deaths <- sum(df[[numerator]])
+  total_cases <- sum(df[[denominator]])
+  cfr <- total_deaths/total_cases*100
+  return(cfr)
+}
diff --git a/R/id_outbreak_peak.R b/R/id_outbreak_peak.R
@@ -0,0 +1,14 @@
+# Function to estimate the peak of the outbreak using incidence2
+# Incidence object from linelist has 3 count_variables, but we want the column
+# date_onset only to be considered to estimate the peak
+id_outbreak_peak <- function(linelist) {
+  inc_data <- linelist %>%
+    incidence2::incidence(date_index = "date_onset", interval = 1L) %>%
+    complete_dates()
+  peak <- inc_data %>%
+    estimate_peak(first_only = TRUE) %>%
+    pull(observed_peak) %>%
+    as.character()
+  return(peak)
+
+}
diff --git a/R/id_outbreak_phases_new.R b/R/id_outbreak_phases_new.R
@@ -0,0 +1,25 @@
+# Function that takes the outbreak's peak (identified previously by id_outbreak_peak),
+# pastes this on a new column
+
+id_outbreak_phases_new <- function(inc_data, outbreak_peaks){
+  inc_data$date <- as.Date(inc_data$date)
+  # Id start date
+  start_date <- min(inc_data$date)
+  # Select a possible date for the growing phase
+  possible_growing_dates <- inc_data %>%
+    filter(date > start_date + 10 & date < outbreak_peaks) %>%
+    pull(date)
+  # Select one of these dates at random
+  growing_date <- sample(possible_growing_dates, 1)
+  # Add new column with outbreak dates
+  inc_data <- inc_data %>%
+    mutate(outbreak_dates = case_when(
+      date == outbreak_peaks & !duplicated(date) ~ "peak",
+      cases >= 5 & row_number() == min(which(cases >= 5))  ~ "start",
+      date == max(date) & !duplicated(date) ~ "end",
+      date == growing_date ~ "growing",
+      TRUE ~ NA_character_
+    )
+    )
+  return(inc_data)
+}
diff --git a/R/id_outbreak_phases_old.R b/R/id_outbreak_phases_old.R
@@ -0,0 +1,18 @@
+#Function to smooth outbreak trend and create a column that indicates the outbreak's
+# peak (mean of window with highest no. of cases), start, and end
+id_outbreak_phases_old <- function(inc_data) {
+  # setting window size
+  window_size <- 7
+  trend <- inc_data %>%
+    mutate(cases_ma = rollmean(cases, k = window_size, fill = NA, align = "center")) %>%
+    mutate(date = as.Date(date)) %>%
+    mutate(cases_ma = replace_na(cases_ma, 0)) %>%
+    mutate(outbreak_dates = case_when(
+      row_number() == which.max(cases_ma) ~ "peak",
+      row_number() == which.min(if_else(cases_ma > 1, row_number(), Inf)) ~ "start",
+      date == max(date) ~ "end",
+      TRUE ~ NA_character_
+    ))
+}
+
+
diff --git a/R/run_ebola_simulations.R b/R/run_ebola_simulations.R
@@ -0,0 +1,27 @@
+# Function to run multiple instances of sim_linelist
+run_ebola_simulations <- function(sim_count) {
+  # Initialize empty list to store simulation results
+  results <- vector("list", length = sim_count)
+
+  # Loop through simulations
+  for (i in 1:sim_count) {
+    # Set seed for reproducibility
+    set.seed(i)  # Use loop counter for unique seeds
+
+    # Run the simulation code and store the result
+    results[[i]] <- sim_linelist(
+      contact_distribution = contact_distribution,
+      infectious_period = ip_ebola_epidist,
+      onset_to_hosp = NA,
+      hosp_risk = NA,
+      hosp_death_risk = NA,
+      onset_to_death = o_d_ebola_epidist,
+      onset_to_recovery = o_r_ebola_epidist,
+      prob_infect = 0.7,
+      outbreak_size = c(500, 2000),
+      non_hosp_death_risk = 0.3
+    )
+  }
+  # Return the list of simulation results
+  return(results)
+}
diff --git a/R/subset_outbreak_phase.R b/R/subset_outbreak_phase.R
@@ -0,0 +1,14 @@
+# Function to select a random date within an outbreak phase and create a subset
+# of data between relevant dates of the outbreak
+subset_outbreak_phase <- function(data, from, to) {
+  # Sample a random date between "from" and "to" statuses
+  sampled_date <- data %>%
+    slice(sample(seq(which(outbreak_dates == from), which(outbreak_dates == to)), 1)) %>%
+    pull(date) %>%
+    as.character()
+
+  # Subset data based on the sampled date
+  subset_data <- data[data$date <= sampled_date, ]
+
+  return(subset_data)
+}
diff --git a/R/tidy_sim_results.R b/R/tidy_sim_results.R
@@ -0,0 +1,16 @@
+# Function to process simulation results
+tidy_sim_results <- function(sim_data) {
+  # Converting class to character so that it's possible to use ifelse
+  sim_data$date_outcome <- as.character(sim_data$date_outcome)
+  # Loop through each individual and update columns
+  for (i in 1:nrow(sim_data)) {
+    sim_data$date_death[i] <- ifelse(sim_data$outcome[i] == "died", sim_data$date_outcome[i], NA)
+    sim_data$date_recovery[i] <- ifelse(sim_data$outcome[i] == "recovered", sim_data$date_outcome[i], NA)
+  }
+  # Convert date columns to Date class
+  sim_data$date_death <- as.Date(sim_data$date_death, format = "%Y-%m-%d")
+  sim_data$date_recovery <- as.Date(sim_data$date_recovery, format = "%Y-%m-%d")
+
+  return(sim_data)
+}
+