From 080ce01ead34e341fedb9b9cfd1427bd08259f89 Mon Sep 17 00:00:00 2001 From: Emma Glennon Date: Sat, 21 Dec 2019 11:18:20 +0200 Subject: [PATCH 1/2] delete old alerts scripts --- .../report_sources/alerts_beni_2019-11-01.Rmd | 2458 ----------------- .../alerts_bukavu_2019-11-01.Rmd | 2458 ----------------- .../alerts_bunia_2019-11-01.Rmd | 2458 ----------------- .../alerts_butembo_2019-11-01.Rmd | 2458 ----------------- .../report_sources/alerts_goma_2019-11-01.Rmd | 2458 ----------------- .../alerts_magina_2019-11-01 .Rmd | 2458 ----------------- .../alerts_mambasa_2019-11-01.Rmd | 2458 ----------------- 7 files changed, 17206 deletions(-) delete mode 100644 alerts/report_sources/alerts_beni_2019-11-01.Rmd delete mode 100644 alerts/report_sources/alerts_bukavu_2019-11-01.Rmd delete mode 100644 alerts/report_sources/alerts_bunia_2019-11-01.Rmd delete mode 100644 alerts/report_sources/alerts_butembo_2019-11-01.Rmd delete mode 100644 alerts/report_sources/alerts_goma_2019-11-01.Rmd delete mode 100644 alerts/report_sources/alerts_magina_2019-11-01 .Rmd delete mode 100644 alerts/report_sources/alerts_mambasa_2019-11-01.Rmd diff --git a/alerts/report_sources/alerts_beni_2019-11-01.Rmd b/alerts/report_sources/alerts_beni_2019-11-01.Rmd deleted file mode 100644 index 62be678..0000000 --- a/alerts/report_sources/alerts_beni_2019-11-01.Rmd +++ /dev/null @@ -1,2458 +0,0 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, and Amy Gimma for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 20 -x_recent <- filter(x, date >= start_date) -outcomes_recent <- filter(outcomes, date >= start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "vrai positif", - true_negative = "vrai négatif", - false_positive = "faux positif", - false_negative = "faux négatif")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origin de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` diff --git a/alerts/report_sources/alerts_bukavu_2019-11-01.Rmd b/alerts/report_sources/alerts_bukavu_2019-11-01.Rmd deleted file mode 100644 index 62be678..0000000 --- a/alerts/report_sources/alerts_bukavu_2019-11-01.Rmd +++ /dev/null @@ -1,2458 +0,0 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, and Amy Gimma for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 20 -x_recent <- filter(x, date >= start_date) -outcomes_recent <- filter(outcomes, date >= start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "vrai positif", - true_negative = "vrai négatif", - false_positive = "faux positif", - false_negative = "faux négatif")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origin de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` diff --git a/alerts/report_sources/alerts_bunia_2019-11-01.Rmd b/alerts/report_sources/alerts_bunia_2019-11-01.Rmd deleted file mode 100644 index 62be678..0000000 --- a/alerts/report_sources/alerts_bunia_2019-11-01.Rmd +++ /dev/null @@ -1,2458 +0,0 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, and Amy Gimma for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 20 -x_recent <- filter(x, date >= start_date) -outcomes_recent <- filter(outcomes, date >= start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "vrai positif", - true_negative = "vrai négatif", - false_positive = "faux positif", - false_negative = "faux négatif")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origin de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` diff --git a/alerts/report_sources/alerts_butembo_2019-11-01.Rmd b/alerts/report_sources/alerts_butembo_2019-11-01.Rmd deleted file mode 100644 index 62be678..0000000 --- a/alerts/report_sources/alerts_butembo_2019-11-01.Rmd +++ /dev/null @@ -1,2458 +0,0 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, and Amy Gimma for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 20 -x_recent <- filter(x, date >= start_date) -outcomes_recent <- filter(outcomes, date >= start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "vrai positif", - true_negative = "vrai négatif", - false_positive = "faux positif", - false_negative = "faux négatif")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origin de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` diff --git a/alerts/report_sources/alerts_goma_2019-11-01.Rmd b/alerts/report_sources/alerts_goma_2019-11-01.Rmd deleted file mode 100644 index 62be678..0000000 --- a/alerts/report_sources/alerts_goma_2019-11-01.Rmd +++ /dev/null @@ -1,2458 +0,0 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, and Amy Gimma for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 20 -x_recent <- filter(x, date >= start_date) -outcomes_recent <- filter(outcomes, date >= start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "vrai positif", - true_negative = "vrai négatif", - false_positive = "faux positif", - false_negative = "faux négatif")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origin de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` diff --git a/alerts/report_sources/alerts_magina_2019-11-01 .Rmd b/alerts/report_sources/alerts_magina_2019-11-01 .Rmd deleted file mode 100644 index 62be678..0000000 --- a/alerts/report_sources/alerts_magina_2019-11-01 .Rmd +++ /dev/null @@ -1,2458 +0,0 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, and Amy Gimma for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 20 -x_recent <- filter(x, date >= start_date) -outcomes_recent <- filter(outcomes, date >= start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "vrai positif", - true_negative = "vrai négatif", - false_positive = "faux positif", - false_negative = "faux négatif")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origin de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` diff --git a/alerts/report_sources/alerts_mambasa_2019-11-01.Rmd b/alerts/report_sources/alerts_mambasa_2019-11-01.Rmd deleted file mode 100644 index 62be678..0000000 --- a/alerts/report_sources/alerts_mambasa_2019-11-01.Rmd +++ /dev/null @@ -1,2458 +0,0 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, and Amy Gimma for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 20 -x_recent <- filter(x, date >= start_date) -outcomes_recent <- filter(outcomes, date >= start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "vrai positif", - true_negative = "vrai négatif", - false_positive = "faux positif", - false_negative = "faux négatif")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origin de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "le sous-coordination de Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` From 3dd4b351f547f9333992e27baedb7bf14f386c46 Mon Sep 17 00:00:00 2001 From: Emma Glennon Date: Tue, 31 Dec 2019 12:57:12 +0200 Subject: [PATCH 2/2] fix bug in outcomes table Outcomes tables previously only produced if at least one of each case outcome (suspect, non_cas, confirme) has occurred in last three weeks. Updated reports will now always compile given sparser data. (Compilation confirmed by Richy and Emma) --- ...9-12-10.Rmd => alerts_beni_2019-12-31.Rmd} | 5726 +++++++++-------- ...-12-10.Rmd => alerts_bunia_2019-12-31.Rmd} | 5326 +++++++-------- ...2-10.Rmd => alerts_butembo_2019-12-31.Rmd} | 5702 ++++++++-------- ...9-12-10.Rmd => alerts_goma_2019-12-31.Rmd} | 5704 ++++++++-------- ...2-10.Rmd => alerts_mambasa_2019-12-31.Rmd} | 5682 ++++++++-------- ...2-10.Rmd => alerts_mangina_2019-12-31.Rmd} | 5714 ++++++++-------- 6 files changed, 16997 insertions(+), 16857 deletions(-) rename alerts/report_sources/{alerts_beni_2019-12-10.Rmd => alerts_beni_2019-12-31.Rmd} (96%) rename alerts/report_sources/{alerts_bunia_2019-12-10.Rmd => alerts_bunia_2019-12-31.Rmd} (96%) rename alerts/report_sources/{alerts_butembo_2019-12-10.Rmd => alerts_butembo_2019-12-31.Rmd} (96%) rename alerts/report_sources/{alerts_goma_2019-12-10.Rmd => alerts_goma_2019-12-31.Rmd} (96%) rename alerts/report_sources/{alerts_mambasa_2019-12-10.Rmd => alerts_mambasa_2019-12-31.Rmd} (96%) rename alerts/report_sources/{alerts_mangina_2019-12-10.Rmd => alerts_mangina_2019-12-31.Rmd} (96%) diff --git a/alerts/report_sources/alerts_beni_2019-12-10.Rmd b/alerts/report_sources/alerts_beni_2019-12-31.Rmd similarity index 96% rename from alerts/report_sources/alerts_beni_2019-12-10.Rmd rename to alerts/report_sources/alerts_beni_2019-12-31.Rmd index 56a9cad..03a339e 100644 --- a/alerts/report_sources/alerts_beni_2019-12-10.Rmd +++ b/alerts/report_sources/alerts_beni_2019-12-31.Rmd @@ -1,2855 +1,2871 @@ ---- -title: "Investigation of alerts data: Beni" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma and Aminata Ndiaye for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Beni. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_beni -x_raw <- custom_import(current_beni) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_beni) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epidemiologique) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_de_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = moyen_de_transmission) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% - mutate(status = conclusion_finale_de_l_investigation) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Beni present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. - - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement"), - contains("sang"), - contains("hemorag") - ) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) - - -#create a variable for the final status of the alert -x <- x %>% - mutate(final_outcome = classification_finale_du_cas) %>% - mutate(final_outcome = factor(final_outcome, levels = c("confirme", - "non_cas", - "suspect"))) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -beni_ha_pop <- pop_data$population -index <- which(is.na(beni_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausees_vomissement, - diarrhees, - fatigue_intense, - perte_d_appetit_anorexie, - douleur_abdminales, - douleur_thoraciques, - douleur_musculaires, - douleurs_articulaires, - cephalees, - toux, - difficulte_respirer, - difficulte_a_avaler, - mal_a_la_gorge, - ictere_conjonctives_gencives_peau, - conjonctivite_oeil_rouge, - eruption_cutanees, - hoquet, - douleurs_retro_orbitaires_photophobie, - coma_perte_de_conscience, - confusion_ou_desorientation, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausees_vomissement, - diarrhees, - fatigue_intense, - perte_d_appetit_anorexie, - douleur_abdminales, - douleur_thoraciques, - douleur_musculaires, - douleurs_articulaires, - cephalees, - toux, - difficulte_respirer, - difficulte_a_avaler, - mal_a_la_gorge, - ictere_conjonctives_gencives_peau, - conjonctivite_oeil_rouge, - eruption_cutanees, - hoquet, - douleurs_retro_orbitaires_photophobie, - coma_perte_de_conscience, - confusion_ou_desorientation, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 21 -x_recent <- filter(x, date > start_date) -outcomes_recent <- filter(outcomes, date > start_date) - -``` - - -## Alerts counts split - -Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. - -```{r alert_count_split} - -greater_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 100) %>% - pull(aire_de_sante) - -less_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 100) %>% - pull(aire_de_sante) - -less_100[less_100 %in% greater_100] <- NA -less_100 <- less_100[complete.cases(less_100)] - -greater_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 20) %>% - pull(aire_de_sante) -less_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 20) %>% - pull(aire_de_sante) - -less_20_recent[less_20_recent %in% greater_20_recent] <- NA -less_20_recent <- less_20_recent[complete.cases(less_20_recent)] - -``` - - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "Validation correcte", - true_negative = "Invalidation correcte", - false_positive = "Fausse alerte", - false_negative = "Alerte manquée")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - - -scale_final_outcome <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(confirme = "#ff9999", - suspect = "#264d73", - non_cas = "#8cb3d9"), - labels = c(confirme = "Cas confirme", - suspect= "Cas suspect", - non_cas = "Non-cas" - - )) - -scale_final_outcome_perc <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(perc_confirme = "#ff9999", - perc_suspect = "#264d73", - perc_non_cas = "#8cb3d9"), - labels = c(perc_confirme = "Cas confirme", - perc_suspect= "Cas suspect", - perc_non_cas = "Non-cas" - - )) - - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Beni")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé - Beni") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table - -```{r health_area_total_greater100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Beni", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - - - -``` - -```{r health_area_total_less100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100, n >= 5) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Beni", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table - -```{r health_area_total_recent_greater20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " (avec plus de 20 alertes) - Beni"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_recent_less20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " (avec moins de 20 alertes) - Beni "), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x_recent %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x_recent %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x_recent %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine - Beni") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine - Beni", -subtitle = "Données des trois dernières semaines", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé - Beni") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - "et zone de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine ", - "et zone de sante - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_origins_greater100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Beni", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_origins_less100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Beni", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - " et zone de sante - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -``` {r health_area_total_recent_origin_greater20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé (avec plus de 20 alertes) - Beni"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -``` {r health_area_total_recent_origin_less20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé (avec moins de 20 alertes) - Beni"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x_recent %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x_recent %>% - filter(top_aires != "other") %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - " et aire de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x_recent %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation - Beni") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - labs(title = paste("Nombre d'alertes par décision de validation - Beni"), -subtitle = "Données des trois dernières semaines", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé - Beni") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_decisionsgreater100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Beni", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_decisionsless100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Beni", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", - legend.title = element_text(size = 12), - legend.text = element_text(size = 10)) + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -```{r health_area_total_recent_decision_greater20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé (avec plus de 20 alertes) - Beni"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_recent_decision_less20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé (avec moins de 20 alertes) - Beni"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes_recent %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes_recent %>% - filter(top_aires != "other") - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - cat("There is no population data therefore alert rates can not be calculated.") -} - - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé - Beni")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x_recent %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x_recent %>% - filter( - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut - Beni"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Final status of alerts {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section investigates information on the proportions of false positive and false negative that became real cases. - - - -## Overall final status by status decision comparison - -###Absolute number of alerts by final status and status decision comparison - -```{r final_status} - -outcomes %>% - filter(!is.na(final_outcome)) %>% -ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_x", space = "free")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -###Percentage of final status by alert status decision - -```{r final_status_percentage} - -perc_final_outcome <- outcomes %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas + suspect, - perc_confirme = prop_to_perc(confirme/total), - perc_non_cas = prop_to_perc(non_cas/total), - perc_suspect = prop_to_perc(suspect/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - -perc_final_outcome %>% -ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -##Final status for the past 21 days - -###Absolute number of alerts by final status and status decision comparison for the past 21 days - -```{r final_status_recent} - - -outcomes_recent %>% - filter(!is.na(final_outcome)) %>% -ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -###Percentage of final status by alert status decision for the past 21 days - -```{r final_status_percentage_recent} - -perc_final_outcome_recent <- outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas + suspect, - perc_confirme = prop_to_perc(confirme/total), - perc_non_cas = prop_to_perc(non_cas/total), - perc_suspect = prop_to_perc(suspect/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - - -perc_final_outcome_recent %>% -ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -### Table - final status overall - -```{r table_final_status, fig.keep = "all"} - -perc_final_outcome %>% - show_table() - -``` - - - -### Table - final status for the past 21 days - -```{r table_final_status_recent, fig.keep = "all"} - -perc_final_outcome_recent %>% - show_table() - -``` - - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_beni), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_beni <- x - -to_export <- c("cleaned_alerts_database_beni", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "perc_final_outcome", - "perc_final_outcome_recent", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_xlsx")) { - dir.create("produced_xlsx") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_beni), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` +--- +title: "Investigation of alerts data: Beni" +author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma, Aminata Ndiaye, and Emma Glennon for the analytic cell OEC Goma" +date: "`r format(Sys.time(), '%A %d %B %Y')`" +output: + html_document: + code_folding: hide + highlight: zenburn + number_sections: yes + theme: spacelab + toc: yes + toc_collapse: no + toc_depth: 1 + toc_float: yes + css: !expr here::here('css', 'style.css') +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set(echo = TRUE, + eval = TRUE, + collapse = TRUE, + fig.width = 8, + fig.height = 6, + dpi = 150, + warning = FALSE, + message = FALSE, + fig.path = "figures/") +``` + + +
+ +**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) + +**Code contributors:** Chris Jarvis, Charlie Whittaker + +**Data contributors:** Surveillance team + +**Version:** 1.0.0 + +**Reviewed by:** Thibaut Jombart + +**Notice**: this is a **stable, routine report**. **Do not touch it unless it is +broken.** To make a contribution, carefully read the +[README](../../../../../README.html) file. + + + + + + + + +# Data preparation {.tabset .tabset-fade .tabset-pills} + + + +## Outline + +This report cleans and analyses the alert data of Beni. Input comes from an +`xlsx` file containing alerts in a specific format. Because all +sub-coordinations have different standards, each sub-coordination needs a +separate report. + +The data preparation involves the following steps, detailed in the following +tabs: + +* **Load scripts**: loads libraries and useful scripts used in the analyses; all + `.R` files contained in `scripts` at the root of the factory are automatically + loaded + +* **Load data**: imports datasets, and may contain some *ad hoc* changes to the +data such as specific data cleaning (not used in other reports), new variables +used in the analyses, etc. + +* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is + not used in other reports (otherwise cleaning should be done in a dedicated + report); this section is also used to create new variables used in the + analyses + +## Load scripts + +These scripts will load: + +* all local scripts, stored as `.R` filesinside `/scripts/` +* all Overall scripts, i.e. stored outside the factory in `../scripts/` + +```{r read_scripts} + +## read scripts +path_to_scripts <- here::here("scripts") +scripts_files <- dir(path_to_scripts, pattern = ".R$", + full.names = TRUE) +for (file in scripts_files) source(file, local = TRUE) + +ggthemr("fresh") + +``` + + + +## Load alerts data + +We extract the completion date from the file name: + +```{r load_alerts_data} + +## load the data +current_beni +x_raw <- custom_import(current_beni) +glimpse(x_raw) + +## extract database date from the file name +file_name <- gsub("^[^.]+/", "", current_beni) +database_date <- file_name %>% + guess_dates() +database_date + +``` + +The **completion date** of the database is **`r format(database_date, format = +"%A %d %b %Y")`**. + + +## Clean data + +We use *linelist*'s function `clean_data()` to: + +- remove all special characters from the data +- set all characters to lower case +- replace all accentuated and diacritic characters with their closest ascii + match in the latin alphabet +- replace all separators with a single `_` +- replace all mis-spelling using a Overall dictionary (see the file + `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R + session, after compiling the document +- (optionally) detect date formats and convert data to dates, including cases + where format varies within a column + + +This cleaning is achieved with: + +```{r data_cleaning} + +x <- x_raw %>% + clean_data(guess_dates = FALSE, + wordlists = cleaning_rules) %>% + as_tibble() + +``` + + +## Renaming variables and sanity checks + +The following variables will be used, and are therefore checked: + +- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` +- sanitize `genre` +- make `origin` as a sanitized version of `source_notif` + +```{r check_variables} + +## date of notification +x <- x %>% mutate(date = guess_dates(date)) +range(x$date, na.rm = TRUE) + +# REVIEW: Remove NA dates +date_na <- sum(is.na(x$date)) +x <- x %>% filter(!is.na(date)) +# Check that na dates are removed + +## gender +table(x$sexe, useNA = "ifany") + +## contact connu +x <- mutate(x, contact_connu = lien_epidemiologique) +table(x$contact_connu, useNA = "ifany") + +## zone de sante +x <- mutate(x, zone_de_sante = as.character(zone_de_sante)) +table(x$zone_de_sante, useNA = "ifany") + +## origin / source_notif +x <- mutate(x, origin = moyen_de_transmission) +table(x$origin, useNA = "ifany") + +# create variable for alert validation +x <- x %>% + mutate(status = conclusion_finale_de_l_investigation) %>% + mutate(status = factor(status, levels = c("statut_inconnu", + "invalidee", + "validee"))) +table(x$status, useNA = "ifany") + +# another round of cleaning +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + + +``` + + +There are `r length(date_na)` missing dates in the data. + + + +## Variable creation + +The following variables are created: + +- `bleed` - which describes whether the alert displayed any of the bleeding related +symptoms. +- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert +was reported in. +- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de +Santes) are grouped into "other". +- `top_aires` - which describes the 14 most active Aire de Santes (out of all the +legitimate Aire de Santes belonging to Beni present in the database). Note - All +other Aire de Santes are aggregated into the category "other". + +- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. + + +Note - All other Aire de Santes are aggregated into the category "other". + +``` {r variable_creation} + +## bleed +bleed <- x %>% + select(contains("saignement"), + contains("sang"), + contains("hemorag") + ) %>% + apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) +x <- mutate(x, bleed = bleed) +table(x$bleed, useNA = "ifany") + +## epiweek report +x <- x %>% + mutate(epiweek_report = + aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% + mutate(epiweek_report_label = + aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) + + +x <- x %>% + mutate(top_zones = top_values(zone_de_sante, 5)) %>% + mutate(top_aires = top_values(aire_de_sante, 14)) + + +#create a variable for the final status of the alert +x <- x %>% + mutate(final_outcome = classification_finale_du_cas) %>% + mutate(final_outcome = factor(final_outcome, levels = c("confirme", + "non_cas", + "suspect"))) +``` + +One last round of dictionary-based cleaning: + +```{r last_cleaning} + +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + +Remove data from before 2019-01-01 as not as reliable. + +```{r remove_pre_2019} + +x <- x %>% + filter(date >= "2019-01-01") + +``` + + +## Outcomes + +Outcome is defined for alerts which have either been validated or invalidated. + +```{r outcomes} + +## get only known outcomes +outcomes <- x %>% + filter(status %in% c("validee", "invalidee")) %>% + droplevels() %>% + mutate(validee = 1 * (status == "validee")) + +``` + + +## Check admin areas are correct + +```{r load_area_names_data} + +## load the data +current_spatial +area_names <- rio::import(current_spatial) %>% + clean_data(guess_dates = FALSE) + +as_names <- unique(area_names$as) +zs_names <- unique(area_names$zs) + +``` + +### Identify unknown areas + +```{r identify_area_names_data} + +## load the data +n_rows <- nrow(x) + +unknown_as <- x %>% + filter(!aire_de_sante %in% as_names) + +most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) + +start_date <- database_date - 20 +unknown_as_3weeks <- x %>% + filter(!aire_de_sante %in% as_names, + date >= start_date) + +``` + + +There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. + +```{r show_unknown_area_names} + +table_unknown_as <- unknown_as %>% + count(zone_de_sante, aire_de_sante) + +table_unknown_as %>% + show_table() + +``` + + +## Filter to known aire de santes + +```{r known_aire_de_sante} + +x <- x %>% + filter(aire_de_sante %in% as_names) + +``` + + + +## Add population data + +Load the population data + +```{r load_pop_data} + +## load the data +current_pop +pop_data <- rio::import(current_pop) +glimpse(pop_data) + +pop_data <- pop_data %>% + clean_data(guess_dates = FALSE) %>% + as_tibble() +``` + +### Check for area not in alerts + +```{r filter_by_alerts} + +## load the data +pop_data <- pop_data %>% + filter(aire_de_sante %in% x$aire_de_sante, + zone_de_sante %in% x$zone_de_sante) + +pop_data + +## aire de sante population +aire_de_sante_pop <- pop_data %>% + mutate(as_population = population) %>% + select(zone_de_sante, aire_de_sante, as_population) + +## zone de sante population +zone_de_sante_pop <- pop_data %>% + group_by(zone_de_sante) %>% + summarise(zs_population = sum(population, na.rm = TRUE)) + +total_population <- sum(unique(pop_data$population), na.rm = TRUE) + +## adding populations to main data +x <- x %>% + left_join(aire_de_sante_pop, + by = c("aire_de_sante", "zone_de_sante")) %>% + left_join(zone_de_sante_pop, + by = c("zone_de_sante")) + +if(!allNA(x$as_population)){ + x <- x %>% + mutate(total_population = total_population) +} + + +``` + + + +## Check population linkage (review regularly) + +```{r check_population_linkage} + +# Calculating number of individuals successfully linked to population data +number_individuals <- nrow(x) +number_individuals_aire_linked <- sum(!is.na(x$as_population)) +prop_linked <- sum(number_individuals_aire_linked / number_individuals) + +# Compare these two quantities - ensure there aren't any mispellings we're missing +table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching + +# Identifying Aire de Santes with missing population data +beni_ha_pop <- pop_data$population +index <- which(is.na(beni_ha_pop)) +non_pop_data <- pop_data$aire_de_sante[index] + +# Identifying number of alerts linked to Aire de Sante with +# missing population data +alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) + +``` + +There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. + + +## Expected decisions + +Alerts are supposed to be validated using the following key, depending on +whether the person has: + +1. **known contact** with a case and at least **one symptom** +2. if not 1, **unexplained bleeding** +3. if not 2, **fever and 3 other symptoms** + +Note that 3 de facto excludes bleeding as a candidate symptom. We create a new +variable which asserts these conditions: + +```{r expected_result} + +## elements of diagnostic +## criteria 1 +has_contact <- outcomes$contact_connu %in% c("confirme", "oui") +has_one_symptom <- outcomes %>% + select(fievre, + nausees_vomissement, + diarrhees, + fatigue_intense, + perte_d_appetit_anorexie, + douleur_abdminales, + douleur_thoraciques, + douleur_musculaires, + douleurs_articulaires, + cephalees, + toux, + difficulte_respirer, + difficulte_a_avaler, + mal_a_la_gorge, + ictere_conjonctives_gencives_peau, + conjonctivite_oeil_rouge, + eruption_cutanees, + hoquet, + douleurs_retro_orbitaires_photophobie, + coma_perte_de_conscience, + confusion_ou_desorientation, + bleed) %>% + apply(1, function(e) any(e == "oui", na.rm = TRUE)) +fits_1 <- has_contact & has_one_symptom + +## criteria 2 +has_bleeding <- outcomes$bleed == "oui" +fits_2 <- !fits_1 & has_bleeding + +## criteria 3 +has_fever <- outcomes$fievre == "oui" +has_3_symptoms <- outcomes %>% + select(nausees_vomissement, + diarrhees, + fatigue_intense, + perte_d_appetit_anorexie, + douleur_abdminales, + douleur_thoraciques, + douleur_musculaires, + douleurs_articulaires, + cephalees, + toux, + difficulte_respirer, + difficulte_a_avaler, + mal_a_la_gorge, + ictere_conjonctives_gencives_peau, + conjonctivite_oeil_rouge, + eruption_cutanees, + hoquet, + douleurs_retro_orbitaires_photophobie, + coma_perte_de_conscience, + confusion_ou_desorientation, + bleed) %>% + apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 +fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms + + +## make sure criteria are exclusive - no number in the table below should exceed +## 1. i.e this table should only zero or one as categories. +table(fits_1 + fits_2 + fits_3) + +## expected decisions +outcomes <- outcomes %>% + mutate(admit_contact = fits_1, + admit_bleeding = fits_2, + admit_fever = fits_3) %>% + mutate(expected_decision = ifelse( + fits_1 | fits_2 | fits_3, + "validee", + "invalidee"), + decision_comparison = case_when( + status == "validee" & expected_decision == "validee" ~ + "true_positive", + status == "invalidee" & expected_decision == "invalidee" ~ + "true_negative", + status == "validee" & expected_decision == "invalidee" ~ + "false_positive", + status == "invalidee" & expected_decision == "validee" ~ + "false_negative", + TRUE ~ NA_character_ + ), + decision_comparison = + factor(decision_comparison, + levels = c("true_positive", + "true_negative", + "false_positive", + "false_negative"))) + +## remove alerts with missing comparisons, from 2019 +outcomes <- outcomes %>% + filter(!is.na(decision_comparison), + date >= as.Date("2019-01-01")) + + +## check that classification is well-made +outcomes %>% + group_by(status, expected_decision, decision_comparison) %>% + count() + + +``` + + + +## Last 21 days + +We duplicate the previous datasets, retaining the 21 days leading up to the +current database date. + +```{r subset_21_days} + +start_date <- database_date - 21 +x_recent <- filter(x, date > start_date) +outcomes_recent <- filter(outcomes, date > start_date) + +``` + + +## Alerts counts split + +Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. + +```{r alert_count_split} + +greater_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 100) %>% + pull(aire_de_sante) + +less_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 100) %>% + pull(aire_de_sante) + +less_100[less_100 %in% greater_100] <- NA +less_100 <- less_100[complete.cases(less_100)] + +greater_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 20) %>% + pull(aire_de_sante) +less_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 20) %>% + pull(aire_de_sante) + +less_20_recent[less_20_recent %in% greater_20_recent] <- NA +less_20_recent <- less_20_recent[complete.cases(less_20_recent)] + +``` + + + +## Custom color scales + +We define custom colors for some of the variables used in the plots. + +```{r scales_fill} + +scale_origins <- scale_fill_manual( + "Origine", + values = c(communautaire = "#ffcc00", + recherche_active = "#c3c388", + surveillance_passive = "#ff6699", + point_entree = "#40bf80", + autre = "#668cff", + inconnu = "grey", + check_cleaning_rules = "grey")) + +scale_decisions <- scale_fill_manual( + "Décisions", + values = c(true_positive = "#94b8b8", + true_negative = "#8c8cd9", + false_positive = "#ff8080", + false_negative = "#b3003b"), + labels = c(true_positive = "Validation correcte", + true_negative = "Invalidation correcte", + false_positive = "Fausse alerte", + false_negative = "Alerte manquée")) + +scale_validations <- scale_fill_manual( + "Outcome", + values = c(statut_inconnu = "#BCB4A4", + validee = "#D56F3E", + invalidee = "#F2C69B"), + labels = c(validee = "Validée", + invalidee = "Invalidée", + statut_inconnu = "Statut Inconnu")) + + +scale_final_outcome <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(confirme = "#ff9999", + suspect = "#264d73", + non_cas = "#8cb3d9"), + labels = c(confirme = "Cas confirme", + suspect= "Cas suspect", + non_cas = "Non-cas" + + )) + +scale_final_outcome_perc <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(perc_confirme = "#ff9999", + perc_suspect = "#264d73", + perc_non_cas = "#8cb3d9"), + labels = c(perc_confirme = "Cas confirme", + perc_suspect= "Cas suspect", + perc_non_cas = "Non-cas" + + )) + + +``` + + + + + + +# Validation status {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + +## Overall + +### Weekly, since database start + +```{r sous_coord_time} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation - Beni")) + + large_txt + + rotate_x_text(45) + + scale_months + + theme(legend.position = "bottom") + +``` + + +### Table - weekly since database start + +```{r sous_coord_time_table, fig.keep = "all"} + +table_validation_overall_time <- x %>% + count(epiweek_report, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_time %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r sous_coord_recent} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + scale_weeks + + theme(legend.position = "bottom") + +``` + +### Table - daily past 3 weeks + +```{r sous_coord_recent_table, fig.keep = "all"} + +table_validation_overall_past_3_weeks <- x_recent %>% + count(date, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_past_3_weeks %>% + show_table() + +``` + + +## Overall proportion validated + +```{r sous_co_proportion_validated} + +x_prop <- x %>% + count(epiweek_report_label, status) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + p = prop_to_perc(validee / total), + lci = prop_ci(validee, total, "lower", TRUE), + uci = prop_ci(validee, total, "upper", TRUE)) %>% + select(epiweek_report_label, p, lci, uci) + +ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + + geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + + geom_point(aes(y = p), size = 2) + + geom_line(aes(y = p), size = 1) + + scale_color_discrete(guide = FALSE) + + ylim(c(0, 100)) + + labs(x = "", + y = "Pourcentage d'alertes validées \npar semaine", + title = paste0("Proportion d'alertes validées - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total} + +ggplot(x, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et \nzone de santé - Beni") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_validation, fig.keep = "all"} + +table_hz_total_validation <- x %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation ", + " et zone de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_validation, fig.keep = "all"} + +table_hz_over_time_validation <- x %>% + count(epiweek_report, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_validation %>% + show_table() + +``` + + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent} + +ggplot(x_recent, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation", + "et \nzone de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_validation, fig.keep = "all"} + +table_hz_total_recent_validation <- x_recent %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_validation %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation ", + "et zone de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_validation <- x_recent %>% + count(date, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_validation %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table + +```{r health_area_total_greater100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Beni", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + + + +``` + +```{r health_area_total_less100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100, n >= 5) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Beni", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total since database start + +```{r table_ha_total_validation, fig.keep = "all"} + +table_ha_total_validation <- x %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = epiweek_report_label, fill = status)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_validation, fig.keep = "all"} + +table_ha_over_time_validation <- x %>% + count(epiweek_report, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_validation %>% + show_table() + +``` + + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table + +```{r health_area_total_recent_greater20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + " (avec plus de 20 alertes) - Beni"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_recent_less20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + " (avec moins de 20 alertes) - Beni "), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_validation, fig.keep = "all"} + +table_ha_total_recent_validation <- x_recent %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_validation %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent, fig.width = 12, fig.height = 8} + +x_validations <- x_recent %>% + filter(top_aires != "other") %>% + count(date, status, top_aires) + +ggplot(x_validations, aes(x = date, y = n, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_validation <- x_recent %>% + count(date, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_validation %>% + show_table() + +``` + + + + + +# Origins {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their origin + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + + +## Overall + +### Weekly, since database start + +```{r sous_coord_origins} + +ggplot(x, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = "Nombre d'alertes par origine - Beni") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +```{r sous_coord_time_table_origins, fig.keep = "all"} + +table_origins <- x %>% + count(epiweek_report, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r origins_time_recent} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + rotate_x_text(45) + + large_txt + + theme(legend.position = "bottom", + panel.spacing.y = unit(1, "lines")) + + labs(title = "Nombre d'alertes par origine - Beni", +subtitle = "Données des trois dernières semaines", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_origins_recent, fig.keep = "all"} + +table_origins_recent <- x_recent %>% + count(date, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins_recent %>% + show_table() + +``` + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_origin} + +ggplot(x, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et zone de santé - Beni") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_origins, fig.keep = "all"} + +table_hz_total_origins <- x %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_origin, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + "et zone de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_origins, fig.keep = "all"} + +table_hz_over_time_origins <- x %>% + count(epiweek_report, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_origins %>% + show_table() + +``` + + +## Health Zone - Past 3 Weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_origin_recent} + +ggplot(x_recent, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et zone de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_origins, fig.keep = "all"} + +table_hz_total_recent_origins <- x_recent %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_origins %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine ", + "et zone de sante - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_origins <- x_recent %>% + count(date, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_origins %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_origins_greater100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Beni", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_origins_less100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Beni", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` +### Table - total since database start + + +```{r table_ha_total_origins, fig.keep = "all"} + +table_ha_total_origins <- x %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_origin, fig.width = 12, fig.height = 8} + +x_origins <- x %>% + filter(top_aires != "other") + +ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + " et zone de sante - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_origins, fig.keep = "all"} + +table_ha_over_time_origins <- x %>% + count(epiweek_report, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_origins %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +``` {r health_area_total_recent_origin_greater20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé (avec plus de 20 alertes) - Beni"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +``` {r health_area_total_recent_origin_less20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé (avec moins de 20 alertes) - Beni"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - Total past 3 weeks + +```{r table_ha_total_recent_origins, fig.keep = "all"} + +table_ha_total_recent_origins <- x_recent %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_origins %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} + +x_origins_recent <- x_recent %>% + filter(top_aires != "other") %>% + count(date, origin, top_aires) + +ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + " et aire de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_origins <- x_recent %>% + count(date, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_origins %>% + show_table() + +``` + + + + +# Validation decisions {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section focuses on the decision of alerts, by health zones. + +When looking at decisions on the treatment of alerts, we identify the 4 +following situations: + +* **true positive**: alerts were rightfully + validated (patient tested) +* **true negative**: alerts were rightfully not + validated (patient not tested) +* **false positive**: alerts were wrongly + validated (patient tested, shoud not have been) +* **false negative**: alerts were wrongly not + validated (patient not tested, should have been) + +**False positive** create a waste of resources as well as un-necessary pressure +on the patient and community. **False negative** create a risk of missing cases. + +For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + +## Overall + +### Weekly, since database start + +```{r decisions} + +ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar(color = "white") + + scale_x_discrete(drop = FALSE) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation - Beni") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + scale_decisions + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +``` {r table_decisions} + +table_decisions <- outcomes %>% + count(epiweek_report, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(c("row", "col")) %>% + mutate(prop_false_positive_95ci = + prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), + prop_false_negative_95ci = + prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), + prop_false_positive = prop_to_perc(false_positive / Total), + lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), + upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), + prop_false_negative = prop_to_perc(false_negative / Total), + lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), + upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) + +table_decisions %>% + select(-prop_false_positive, + -lower_false_positive, + -upper_false_positive, + -prop_false_negative , + -lower_false_negative, + -upper_false_negative + ) %>% + show_table() + +``` + + +### Table - incorrectly validated/not validated + +* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. +* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. + +```{r table_sens_spec} + +table_sens_spec <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + mutate(total_tested = true_positive + false_positive, + total_not_tested = true_negative + false_negative) %>% + mutate(incorrectly_validated_95ci = + prop_to_display_ci(false_positive, total_tested, + dec = 2, perc = TRUE), + incorrectly_not_validated_95ci = + prop_to_display_ci(false_negative, total_not_tested, + dec = 2, perc = TRUE)) +table_sens_spec %>% + show_table() + +``` + +`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. + +**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. + + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r decisions_time} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + rotate_x_text(45) + + large_txt + + scale_decisions + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + labs(title = paste("Nombre d'alertes par décision de validation - Beni"), +subtitle = "Données des trois dernières semaines", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Over time table + +``` {r table_decisions_recent} + +table_decisions_recent <- incidence(outcomes_recent$date, "day", + groups = outcomes_recent$decision_comparison) %>% + as.data.frame() %>% + adorn_totals(where = c("row", "col")) + +table_decisions_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_decision} + +ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision et zone de santé - Beni") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + rotate_x_text(45) + +``` + +### Table - Total since database start + +```{r table_hz_total_decisions, fig.keep = "all"} + +table_hz_total_decisions <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_decision, fig.width = 12, fig.height = 8} + +ggplot(outcomes, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_decisions, fig.keep = "all"} + +table_hz_over_time_decisions <- outcomes %>% + count(epiweek_report, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_decisions %>% + show_table() + +``` + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_decision_recent} + +ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision et zone de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_decisions, fig.keep = "all"} + +table_hz_total_recent_decisions <- outcomes_recent %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_decisions %>% + show_table() + +``` + +### Daily past 3 weeks + +```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de sante - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_decisions %>% + show_table() + +``` + + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_decisionsgreater100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Beni", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_decisionsless100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Beni", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_decision, fig.keep = "all"} + +table_ha_total_decisions <- outcomes %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_decision, fig.width = 12, fig.height = 8} + +x_decisions <- outcomes %>% + filter(top_aires != "other") + +ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de sante - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", + legend.title = element_text(size = 12), + legend.text = element_text(size = 10)) + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_decisions, fig.keep = "all"} + +table_ha_over_time_decisions <- outcomes %>% + count(epiweek_report, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_decisions %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +```{r health_area_total_recent_decision_greater20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé (avec plus de 20 alertes) - Beni"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_recent_decision_less20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé (avec moins de 20 alertes) - Beni"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total past 3 weeks + +```{r table_ha_total_recent_decisions, fig.keep = "all"} + +table_ha_total_recent_decisions <- outcomes_recent %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_decisions %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} + +x_decisions_recent <- outcomes_recent %>% + filter(top_aires != "other") + +ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_decisions} + +table_ha_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_decisions %>% + show_table() + +``` + + + + + + +# Alert Rates {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. + +**Note:** Rate are only calculated for areas with population data. There +are `r length(non_pop_data)` Aire de Santes without population data, which totals + **`r alerts_no_pop`** alerts. + +**Note: All known Aire de Santes are reported in tables** + + + +```{r reactivate_alert_rates, include = FALSE} + +if(allNA(x$as_population)){ + knitr::opts_chunk$set(eval = FALSE) + no_pop_data <- TRUE + cat("There is no population data therefore alert rates can not be calculated.") +} + + +``` + + + +## Overall + +### Weekly, Since Database Start + +```{r alert_rates} + +x_alert_rate_sous_co <- x %>% + count(epiweek_report_label, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, + aes(x = epiweek_report_label, y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + +### Table - Weekly Since Database Start + +```{r table_alert_rates_per_week, fig.keep = "all"} + +table_alert_rates_per_week <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_per_week %>% + show_table() + +``` + + + +## Overall Past 3 Weeks + +### Daily, Past 3 Weeks + +``` {r alert_rates_recent} + +x_alert_rate_sous_co <- x_recent %>% + count(date, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - Weekly Since Database Start + +```{r table_alert_rates_recent, fig.keep = "all"} + +table_alert_rates_recent <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total, Since Database Start + +``` {r alert_rate_zone_de_sante_total_validation_status} + +date_span <- database_date - min(x$date, na.rm = TRUE) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(status, zone_de_sante, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(epiweek_report_label, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation, et zone de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + + +## Health Zone Past 3 Weeks + +### Total, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + as_population > 0, + top_zones != "other") %>% + count(status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * 3)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + theme(legend.position = "bottom") + + +``` + +### Daily, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + zs_population > 0, + top_zones != "other") %>% + count(date, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + + + +## Health Area + +### Total, Since Database Start + +``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} + +date_span <- database_date - min(x$date) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, top_zones, aire_de_sante, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut ", + "de validation et aire de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x %>% + filter(top_aires != "other", + as_population > 0) %>% + count(epiweek_report_label, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / as_population) + +ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " de validation et aire de santé - Beni")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + +## Health Area - Past 3 Weeks + +``` {r alert_rate_as_3_weeks, fig.width = 14} + +x_alert_rate_aire_sante_recent <- x_recent %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, aire_de_sante, top_zones, as_population) %>% + mutate(alert_rate = (10000 * n) / (as_population * 3)) + +ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par 10000 \npersonnes et par semaine", + title = paste0("Nombre d'alertes par 10000 personnes et par semaine - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + +``` + + +### Daily, Past 3 Weeks + +``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x_recent %>% + filter( + top_aires != "other", + as_population > 0) %>% + count(date, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population)) + +ggplot(data = x_alert_rate_aire_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut - Beni"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + + + + +# Final status of alerts {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section investigates information on the proportions of false positive and false negative that became real cases. + + + +## Overall final status by status decision comparison + +###Absolute number of alerts by final status and status decision comparison + +```{r final_status} + +outcomes %>% + filter(!is.na(final_outcome)) %>% +ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_x", space = "free")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +###Percentage of final status by alert status decision + +```{r final_status_percentage} + +perc_final_outcome <- outcomes %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_non_cas = prop_to_perc(non_cas/total), + perc_suspect = prop_to_perc(suspect/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + +perc_final_outcome %>% +ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +##Final status for the past 21 days + +###Absolute number of alerts by final status and status decision comparison for the past 21 days + +```{r final_status_recent} + + +outcomes_recent %>% + filter(!is.na(final_outcome)) %>% +ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +###Percentage of final status by alert status decision for the past 21 days + +```{r final_status_percentage_recent} + +perc_final_outcome_recent <- outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_non_cas = prop_to_perc(non_cas/total), + perc_suspect = prop_to_perc(suspect/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + + +perc_final_outcome_recent %>% +ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +### Table - final status overall + +```{r table_final_status, fig.keep = "all"} + +perc_final_outcome %>% + show_table() + +``` + + + +### Table - final status for the past 21 days + +```{r table_final_status_recent, fig.keep = "all"} + +perc_final_outcome_recent %>% + show_table() + +``` + + + + + + +# Export data and tables {.tabset .tabset-fade .tabset-pills} + + +```{r reactivate_recent, include = FALSE} +knitr::opts_chunk$set(eval = TRUE) +``` + + +## Outline + +We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current +working directory. + + + + +## Export clean data + +We export some of the clean database, placed in `produced_rds/` as well as in +`data/clean/`: + +```{r export_rds, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +## create the text for the file name with the database date +rds_file_name <- sprintf("%sclean_%s.rds", + undated_file_name(current_beni), + format(database_date, "%Y-%m-%d")) +rds_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_rds", rds_file_name)) + +``` + +We copy these files to the `data/clean` folder: + +```{r copy_rds, eval = TRUE} +# copy some files into `data/clean/` + +if (!dir.exists("data/clean")) { + dir.create("data/clean") +} + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + rds_file_name) +# Copy the rds data +file.copy(from = file.path("produced_rds", rds_file_name), + to = destination, + overwrite = TRUE) + +``` + + + + +## Excel files + +The following code exports all tables named in `to_report` to `xslx` files, +stored inside the folder `produced_xlsx`: + +### Cleaned alerts database + +```{r exports_tables, eval = TRUE} + +cleaned_alerts_database_beni <- x + +to_export <- c("cleaned_alerts_database_beni", + "table_validation_overall_time", + "table_validation_overall_past_3_weeks", + "table_hz_total_validation", + "table_hz_over_time_validation", + "table_hz_total_recent_validation", + "table_hz_over_time_recent_validation", + "table_ha_total_validation", + "table_ha_over_time_validation", + "table_ha_total_recent_validation", + "table_ha_over_time_recent_validation", + "table_origins", + "table_origins_recent", + "table_hz_total_origins", + "table_hz_over_time_origins", + "table_hz_total_recent_origins", + "table_hz_over_time_recent_origins", + "table_ha_total_origins", + "table_ha_over_time_origins", + "table_ha_total_recent_origins", + "table_ha_over_time_recent_origins", + "table_decisions", + "table_sens_spec", + "table_decisions_recent", + "table_hz_total_decisions", + "table_hz_over_time_decisions", + "table_hz_total_recent_decisions", + "table_hz_over_time_recent_decisions", + "table_ha_total_decisions", + "table_ha_over_time_decisions", + "table_ha_total_recent_decisions", + "table_ha_over_time_recent_decisions", + "perc_final_outcome", + "perc_final_outcome_recent", + "table_unknown_as") + +``` + +```{r xlsx_exports, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_xlsx")) { + dir.create("produced_xlsx") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_xlsx", + paste0(e, ".xlsx"))) +} + +``` + + +We copy the main data file to the `data/clean` folder: + +```{r export_xlsx, eval = TRUE} + +## create the text for the file name with the database date +xlsx_file_name <- sprintf("%sclean_%s.xlsx", + undated_file_name(current_beni), + format(database_date, "%Y-%m-%d")) +xlsx_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_xlsx", xlsx_file_name)) + +``` + + +```{r copy_xlsx, eval = TRUE} +# copy some files into `data/clean/` + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + xlsx_file_name) +# Copy the rds data +file.copy(from = file.path("produced_xlsx", xlsx_file_name), + to = destination, + overwrite = TRUE) + +``` + + + +Click on the following links to open the files (only works if the files above +have been generated and are in the same folder as this document): + + +```{r xlsx_links, results = "asis", eval = TRUE} + +for (e in to_export) { + txt <- sprintf("- [%s.xlsx](%s.xlsx)", + e, + file.path("produced_xlsx", + e)) + cat(txt, sep = "\n") +} + +``` + + + +## R objects + +The following code exports all tables named in `to_report` to `rds` files, +stored inside the folder `produced_rds`: + +```{r rds_exports, eval = TRUE} + +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_rds", + paste0(e, ".rds"))) +} + +``` + + + + + +# System information {.tabset .tabset-fade .tabset-pills} + + +## Outline + +The following information documents the system on which the document was +compiled. + + + +## System + +This provides information on the operating system. + +```{r system_info} +Sys.info() +``` + + +## R environment + +This provides information on the version of R used: + +```{r R_session} +R.version +``` + + + +## R packages + +This provides information on the packages used: + +```{r R_pkg} +sessionInfo() +``` + + +## Compilation parameters + +This shows which parameters were passed through `params` at compilation time: + +```{r params} +params +``` diff --git a/alerts/report_sources/alerts_bunia_2019-12-10.Rmd b/alerts/report_sources/alerts_bunia_2019-12-31.Rmd similarity index 96% rename from alerts/report_sources/alerts_bunia_2019-12-10.Rmd rename to alerts/report_sources/alerts_bunia_2019-12-31.Rmd index 351615a..9a9ddd2 100644 --- a/alerts/report_sources/alerts_bunia_2019-12-10.Rmd +++ b/alerts/report_sources/alerts_bunia_2019-12-31.Rmd @@ -1,2655 +1,2671 @@ ---- -title: "Investigation of alerts data: Bunia" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma and Aminata Ndiaye for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Bunia. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_bunia -x_raw <- custom_import(current_bunia) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_bunia) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epidemiologique) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_de_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_de_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_de_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse( conclusion_finale_de_l_investigation == "inconnu", "statut_inconnu", conclusion_finale_de_l_investigation)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Bunia present in the database). - -Note - All other Aire de Santes are aggregated into the category "other". - -- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) - - -#create a variable for the final status of the alert -x <- x %>% - mutate(final_outcome = classification_finale_du_cas) %>% - mutate(final_outcome = factor(final_outcome, levels = c("confirme", - "non_cas", - "suspect"))) - -#table(x$final_outcome, x$status, useNA = "ifany") - -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -bunia_ha_pop <- pop_data$population -index <- which(is.na(bunia_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vomissment, - diarrhee, - fatigue_asthenie, - anorexie_perte_d_appetit, - douleurs_abdominales, - douleur_thoracique, - douleur_musculaire, - douleur_articulaire, - cephalee, - toux, - diffciulties_a_respirer, - difficulties_a_avaler, - mal_a_la_gorge, - hoquet, - saignements_inexpliques) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vomissment, - diarrhee, - fatigue_asthenie, - anorexie_perte_d_appetit, - douleurs_abdominales, - douleur_thoracique, - douleur_musculaire, - douleur_articulaire, - cephalee, - toux, - diffciulties_a_respirer, - difficulties_a_avaler, - mal_a_la_gorge, - hoquet, - saignements_inexpliques) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 21 -x_recent <- filter(x, date > start_date) -outcomes_recent <- filter(outcomes, date > start_date) - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "Validation correcte", - true_negative = "Invalidation correcte", - false_positive = "Fausse alerte", - false_negative = "Alerte manquée")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - - -scale_final_outcome <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(confirme = "#ff9999", - suspect = "#264d73", - non_cas = "#8cb3d9"), - labels = c(confirme = "Cas confirme", - suspect= "Cas suspect", - non_cas = "Non-cas" - - )) - -scale_final_outcome_perc <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(perc_confirme = "#ff9999", - perc_suspect = "#264d73", - perc_non_cas = "#8cb3d9"), - labels = c(perc_confirme = "Cas confirme", - perc_suspect= "Cas suspect", - perc_non_cas = "Non-cas" - - )) - - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Bunia")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Bunia")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé", - " depuis les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -```{r health_area_total, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent, fig.width = 14} - -x_validations <- x %>% - filter(date > start_date) %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " depuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - filter(date > start_date) %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x %>% - filter(date > start_date) %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x %>% - filter(date > start_date) %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine et par semaine") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine et par semaine", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de santé - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé depuis\n", - "les 3 semaines dernières")) + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de sante - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 100 alerts removed from graph but kept in table. - -``` {r health_area_total_origins, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et zone de sante - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 20 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin, fig.width = 14} - -x_origins <- x %>% - filter(date > start_date) %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x %>% - filter(top_aires != "other", - date > start_date) %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x %>% - filter(date > start_date) %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". - - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - labs(title = paste("Nombre d'alertes par décision de validation"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé depuis", - "\nles 3 semaines dernières")) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 100 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision, fig.width = 14} - -x_decisions <- outcomes %>% - filter(date > start_date) %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(n >= 20) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\ndepuis les 3 semaines dernières"), - subtitle = "avec plus de 20 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes %>% - filter(date > start_date) %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "la sous-coordination de Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes pour \n", - "la sous-coordination de Bunia")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé depuis le 3 semaines", - " \ndernières")) + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x %>% - filter(top_zones != "other", - date > start_date, - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes et par semaine", - "\npour les 3 semaines dernières")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(date > start_date, - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - - - -# Final status of alerts {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section investigates information on the proportions of false positive and false negative that became real cases. - - -## Overall final status by status decision comparison - -###Absolute number of alerts by final status and status decision comparison - -```{r final_status} - -outcomes %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_x", space = "free")+ - scale_final_outcome + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \nrègle de décision et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -###Percentage of final status by alert status decision - -```{r final_status_percentage} - -perc_final_outcome <- outcomes %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas, - perc_confirme = prop_to_perc(confirme/total), - perc_non_cas = prop_to_perc(non_cas/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - -perc_final_outcome %>% - ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -##Final status for the past 21 days - -###Absolute number of alerts by final status and status decision comparison for the past 21 days - -```{r final_status_recent} - - -outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -###Percentage of final status by alert status decision for the past 21 days - -```{r final_status_percentage_recent} - -perc_final_outcome_recent <- outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= non_cas , - perc_non_cas = prop_to_perc(non_cas/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - - -perc_final_outcome_recent %>% - ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -### Table - final status overall - -```{r table_final_status, fig.keep = "all"} - -perc_final_outcome %>% - show_table() - -``` - - - -### Table - final status for the past 21 days - -```{r table_final_status_recent, fig.keep = "all"} - -perc_final_outcome_recent %>% - show_table() - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_bunia), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_bunia <- x - -to_export <- c("cleaned_alerts_database_bunia", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "perc_final_outcome", - "perc_final_outcome_recent", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_bunia), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` +--- +title: "Investigation of alerts data: Bunia" +author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma, Aminata Ndiaye, and Emma Glennon for the analytic cell OEC Goma" +date: "`r format(Sys.time(), '%A %d %B %Y')`" +output: + html_document: + code_folding: hide + highlight: zenburn + number_sections: yes + theme: spacelab + toc: yes + toc_collapse: no + toc_depth: 1 + toc_float: yes + css: !expr here::here('css', 'style.css') +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set(echo = TRUE, + eval = TRUE, + collapse = TRUE, + fig.width = 8, + fig.height = 6, + dpi = 150, + warning = FALSE, + message = FALSE, + fig.path = "figures/") +``` + + +
+ +**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) + +**Code contributors:** Chris Jarvis, Charlie Whittaker + +**Data contributors:** Surveillance team + +**Version:** 1.0.0 + +**Reviewed by:** Thibaut Jombart + +**Notice**: this is a **stable, routine report**. **Do not touch it unless it is +broken.** To make a contribution, carefully read the +[README](../../../../../README.html) file. + + + + + + + + +# Data preparation {.tabset .tabset-fade .tabset-pills} + + + +## Outline + +This report cleans and analyses the alert data of Bunia. Input comes from an +`xlsx` file containing alerts in a specific format. Because all +sub-coordinations have different standards, each sub-coordination needs a +separate report. + +The data preparation involves the following steps, detailed in the following +tabs: + +* **Load scripts**: loads libraries and useful scripts used in the analyses; all + `.R` files contained in `scripts` at the root of the factory are automatically + loaded + +* **Load data**: imports datasets, and may contain some *ad hoc* changes to the +data such as specific data cleaning (not used in other reports), new variables +used in the analyses, etc. + +* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is + not used in other reports (otherwise cleaning should be done in a dedicated + report); this section is also used to create new variables used in the + analyses + +## Load scripts + +These scripts will load: + +* all local scripts, stored as `.R` filesinside `/scripts/` +* all Overall scripts, i.e. stored outside the factory in `../scripts/` + +```{r read_scripts} + +## read scripts +path_to_scripts <- here::here("scripts") +scripts_files <- dir(path_to_scripts, pattern = ".R$", + full.names = TRUE) +for (file in scripts_files) source(file, local = TRUE) + +ggthemr("fresh") + +``` + + + +## Load alerts data + +We extract the completion date from the file name: + +```{r load_alerts_data} + +## load the data +current_bunia +x_raw <- custom_import(current_bunia) +glimpse(x_raw) + +## extract database date from the file name +file_name <- gsub("^[^.]+/", "", current_bunia) +database_date <- file_name %>% + guess_dates() +database_date + +``` + +The **completion date** of the database is **`r format(database_date, format = +"%A %d %b %Y")`**. + + +## Clean data + +We use *linelist*'s function `clean_data()` to: + +- remove all special characters from the data +- set all characters to lower case +- replace all accentuated and diacritic characters with their closest ascii + match in the latin alphabet +- replace all separators with a single `_` +- replace all mis-spelling using a Overall dictionary (see the file + `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R + session, after compiling the document +- (optionally) detect date formats and convert data to dates, including cases + where format varies within a column + + +This cleaning is achieved with: + +```{r data_cleaning} + +x <- x_raw %>% + clean_data(guess_dates = FALSE, + wordlists = cleaning_rules) %>% + as_tibble() + +``` + + +## Renaming variables and sanity checks + +The following variables will be used, and are therefore checked: + +- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` +- sanitize `genre` +- make `origin` as a sanitized version of `source_notif` + +```{r check_variables} + +## date of notification +x <- x %>% mutate(date = guess_dates(date)) +range(x$date, na.rm = TRUE) + +# REVIEW: Remove NA dates +date_na <- sum(is.na(x$date)) +x <- x %>% filter(!is.na(date)) +# Check that na dates are removed + +## gender +table(x$sexe, useNA = "ifany") + +## contact connu +x <- mutate(x, contact_connu = lien_epidemiologique) +table(x$contact_connu, useNA = "ifany") + +## zone de sante +x <- mutate(x, zone_de_sante = as.character(zone_de_sante)) +table(x$zone_de_sante, useNA = "ifany") + +## aire de sante +x <- mutate(x, aire_de_sante = as.character(aire_de_sante)) +table(x$aire_de_sante, useNA = "ifany") + +## origin / source_notif +x <- mutate(x, origin = type_de_surveillance) +table(x$origin, useNA = "ifany") + +# create variable for alert validation +x <- x %>% mutate( + status = ifelse( conclusion_finale_de_l_investigation == "inconnu", "statut_inconnu", conclusion_finale_de_l_investigation)) %>% + mutate(status = factor(status, levels = c("statut_inconnu", + "invalidee", + "validee"))) +table(x$status, useNA = "ifany") + + +# another round of cleaning +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + + +There are `r length(date_na)` missing dates in the data. + + + +## Variable creation + +The following variables are created: + +- `bleed` - which describes whether the alert displayed any of the bleeding related +symptoms. +- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert +was reported in. +- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de +Santes) are grouped into "other". +- `top_aires` - which describes the 14 most active Aire de Santes (out of all the +legitimate Aire de Santes belonging to Bunia present in the database). + +Note - All other Aire de Santes are aggregated into the category "other". + +- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. + +``` {r variable_creation} + +## bleed +bleed <- x %>% + select(contains("saignement")) %>% + apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) +x <- mutate(x, bleed = bleed) +table(x$bleed, useNA = "ifany") + +## epiweek report +x <- x %>% + mutate(epiweek_report = + aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% + mutate(epiweek_report_label = + aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) + + + +x <- x %>% + mutate(top_zones = top_values(zone_de_sante, 5)) %>% + mutate(top_aires = top_values(aire_de_sante, 14)) + + +#create a variable for the final status of the alert +x <- x %>% + mutate(final_outcome = classification_finale_du_cas) %>% + mutate(final_outcome = factor(final_outcome, levels = c("confirme", + "non_cas", + "suspect"))) + +#table(x$final_outcome, x$status, useNA = "ifany") + +``` + +One last round of dictionary-based cleaning: + +```{r last_cleaning} + +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + +Remove data from before 2019-01-01 as not as reliable. + +```{r remove_pre_2019} + +x <- x %>% + filter(date >= "2019-01-01") + +``` + + +## Outcomes + +Outcome is defined for alerts which have either been validated or invalidated. + +```{r outcomes} + +## get only known outcomes +outcomes <- x %>% + filter(status %in% c("validee", "invalidee")) %>% + droplevels() %>% + mutate(validee = 1 * (status == "validee")) + +``` + + +## Check admin areas are correct + +```{r load_area_names_data} + +## load the data +current_spatial +area_names <- rio::import(current_spatial) %>% + clean_data(guess_dates = FALSE) + +as_names <- unique(area_names$as) +zs_names <- unique(area_names$zs) + +``` + +### Identify unknown areas + +```{r identify_area_names_data} + +## load the data +n_rows <- nrow(x) + +unknown_as <- x %>% + filter(!aire_de_sante %in% as_names) + +most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) + +start_date <- database_date - 20 +unknown_as_3weeks <- x %>% + filter(!aire_de_sante %in% as_names, + date >= start_date) + +``` + + +There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. + +```{r show_unknown_area_names} + +table_unknown_as <- unknown_as %>% + count(zone_de_sante, aire_de_sante) + +table_unknown_as %>% + show_table() + +``` + + +## Filter to known aire de santes + +```{r known_aire_de_sante} + +x <- x %>% + filter(aire_de_sante %in% as_names) + +``` + + + +## Add population data + +Load the population data + +```{r load_pop_data} + +## load the data +current_pop +pop_data <- rio::import(current_pop) +glimpse(pop_data) + +pop_data <- pop_data %>% + clean_data(guess_dates = FALSE) %>% + as_tibble() +``` + +### Check for area not in alerts + +```{r filter_by_alerts} + +## load the data +pop_data <- pop_data %>% + filter(aire_de_sante %in% x$aire_de_sante, + zone_de_sante %in% x$zone_de_sante) + +pop_data + +## aire de sante population +aire_de_sante_pop <- pop_data %>% + mutate(as_population = population) %>% + select(zone_de_sante, aire_de_sante, as_population) + +## zone de sante population +zone_de_sante_pop <- pop_data %>% + group_by(zone_de_sante) %>% + summarise(zs_population = sum(population, na.rm = TRUE)) + +total_population <- sum(unique(pop_data$population), na.rm = TRUE) + +## adding populations to main data +x <- x %>% + left_join(aire_de_sante_pop, + by = c("aire_de_sante", "zone_de_sante")) %>% + left_join(zone_de_sante_pop, + by = c("zone_de_sante")) + +if(!allNA(x$as_population)){ + x <- x %>% + mutate(total_population = total_population) +} + + +``` + + + +## Check population linkage (review regularly) + +```{r check_population_linkage} + +# Calculating number of individuals successfully linked to population data +number_individuals <- nrow(x) +number_individuals_aire_linked <- sum(!is.na(x$as_population)) +prop_linked <- sum(number_individuals_aire_linked / number_individuals) + +# Compare these two quantities - ensure there aren't any mispellings we're missing +table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching + +# Identifying Aire de Santes with missing population data +bunia_ha_pop <- pop_data$population +index <- which(is.na(bunia_ha_pop)) +non_pop_data <- pop_data$aire_de_sante[index] + +# Identifying number of alerts linked to Aire de Sante with +# missing population data +alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) + +``` + +There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. + + +## Expected decisions + +Alerts are supposed to be validated using the following key, depending on +whether the person has: + +1. **known contact** with a case and at least **one symptom** +2. if not 1, **unexplained bleeding** +3. if not 2, **fever and 3 other symptoms** + +Note that 3 de facto excludes bleeding as a candidate symptom. We create a new +variable which asserts these conditions: + +```{r expected_result} + +## elements of diagnostic +## criteria 1 +has_contact <- outcomes$contact_connu %in% c("confirme", "oui") +has_one_symptom <- outcomes %>% + select(fievre, + nausee_vomissment, + diarrhee, + fatigue_asthenie, + anorexie_perte_d_appetit, + douleurs_abdominales, + douleur_thoracique, + douleur_musculaire, + douleur_articulaire, + cephalee, + toux, + diffciulties_a_respirer, + difficulties_a_avaler, + mal_a_la_gorge, + hoquet, + saignements_inexpliques) %>% + apply(1, function(e) any(e == "oui", na.rm = TRUE)) +fits_1 <- has_contact & has_one_symptom + +## criteria 2 +has_bleeding <- outcomes$bleed == "oui" +fits_2 <- !fits_1 & has_bleeding + +## criteria 3 +has_fever <- outcomes$fievre == "oui" +has_3_symptoms <- outcomes %>% + select(nausee_vomissment, + diarrhee, + fatigue_asthenie, + anorexie_perte_d_appetit, + douleurs_abdominales, + douleur_thoracique, + douleur_musculaire, + douleur_articulaire, + cephalee, + toux, + diffciulties_a_respirer, + difficulties_a_avaler, + mal_a_la_gorge, + hoquet, + saignements_inexpliques) %>% + apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 +fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms + + +## make sure criteria are exclusive - no number in the table below should exceed +## 1. i.e this table should only zero or one as categories. +table(fits_1 + fits_2 + fits_3) + +## expected decisions +outcomes <- outcomes %>% + mutate(admit_contact = fits_1, + admit_bleeding = fits_2, + admit_fever = fits_3) %>% + mutate(expected_decision = ifelse( + fits_1 | fits_2 | fits_3, + "validee", + "invalidee"), + decision_comparison = case_when( + status == "validee" & expected_decision == "validee" ~ + "true_positive", + status == "invalidee" & expected_decision == "invalidee" ~ + "true_negative", + status == "validee" & expected_decision == "invalidee" ~ + "false_positive", + status == "invalidee" & expected_decision == "validee" ~ + "false_negative", + TRUE ~ NA_character_ + ), + decision_comparison = + factor(decision_comparison, + levels = c("true_positive", + "true_negative", + "false_positive", + "false_negative"))) + +## remove alerts with missing comparisons, from 2019 +outcomes <- outcomes %>% + filter(!is.na(decision_comparison), + date >= as.Date("2019-01-01")) + + +## check that classification is well-made +outcomes %>% + group_by(status, expected_decision, decision_comparison) %>% + count() + + +``` + + + +## Last 21 days + +We duplicate the previous datasets, retaining the 21 days leading up to the +current database date. + +```{r subset_21_days} + +start_date <- database_date - 21 +x_recent <- filter(x, date > start_date) +outcomes_recent <- filter(outcomes, date > start_date) + +``` + + +## Custom color scales + +We define custom colors for some of the variables used in the plots. + +```{r scales_fill} + +scale_origins <- scale_fill_manual( + "Origine", + values = c(communautaire = "#ffcc00", + recherche_active = "#c3c388", + surveillance_passive = "#ff6699", + point_entree = "#40bf80", + autre = "#668cff", + inconnu = "grey", + check_cleaning_rules = "grey")) + +scale_decisions <- scale_fill_manual( + "Décisions", + values = c(true_positive = "#94b8b8", + true_negative = "#8c8cd9", + false_positive = "#ff8080", + false_negative = "#b3003b"), + labels = c(true_positive = "Validation correcte", + true_negative = "Invalidation correcte", + false_positive = "Fausse alerte", + false_negative = "Alerte manquée")) + +scale_validations <- scale_fill_manual( + "Outcome", + values = c(statut_inconnu = "#BCB4A4", + validee = "#D56F3E", + invalidee = "#F2C69B"), + labels = c(validee = "Validée", + invalidee = "Invalidée", + statut_inconnu = "Statut Inconnu")) + + +scale_final_outcome <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(confirme = "#ff9999", + suspect = "#264d73", + non_cas = "#8cb3d9"), + labels = c(confirme = "Cas confirme", + suspect= "Cas suspect", + non_cas = "Non-cas" + + )) + +scale_final_outcome_perc <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(perc_confirme = "#ff9999", + perc_suspect = "#264d73", + perc_non_cas = "#8cb3d9"), + labels = c(perc_confirme = "Cas confirme", + perc_suspect= "Cas suspect", + perc_non_cas = "Non-cas" + + )) + + +``` + + + + + + +# Validation status {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + +## Overall + +### Weekly, since database start + +```{r sous_coord_time} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation - Bunia")) + + large_txt + + rotate_x_text(45) + + scale_months + + theme(legend.position = "bottom") + +``` + + +### Table - weekly since database start + +```{r sous_coord_time_table, fig.keep = "all"} + +table_validation_overall_time <- x %>% + count(epiweek_report, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_time %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r sous_coord_recent} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation - Bunia")) + + large_txt + + rotate_x_text(45) + + scale_weeks + + theme(legend.position = "bottom") + +``` + +### Table - daily past 3 weeks + +```{r sous_coord_recent_table, fig.keep = "all"} + +table_validation_overall_past_3_weeks <- x_recent %>% + count(date, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_past_3_weeks %>% + show_table() + +``` + + +## Overall proportion validated + +```{r sous_co_proportion_validated} + +x_prop <- x %>% + count(epiweek_report_label, status) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + p = prop_to_perc(validee / total), + lci = prop_ci(validee, total, "lower", TRUE), + uci = prop_ci(validee, total, "upper", TRUE)) %>% + select(epiweek_report_label, p, lci, uci) + +ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + + geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + + geom_point(aes(y = p), size = 2) + + geom_line(aes(y = p), size = 1) + + scale_color_discrete(guide = FALSE) + + ylim(c(0, 100)) + + labs(x = "", + y = "Pourcentage d'alertes validées \npar semaine", + title = paste0("Proportion d'alertes validées par semaine - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total} + +ggplot(x, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et \nzone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_validation, fig.keep = "all"} + +table_hz_total_validation <- x %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation ", + " et zone de santé - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_validation, fig.keep = "all"} + +table_hz_over_time_validation <- x %>% + count(epiweek_report, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_validation %>% + show_table() + +``` + + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent} + +ggplot(x_recent, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation", + "et \nzone de santé", + " depuis les 3 semaines dernières")) + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_validation, fig.keep = "all"} + +table_hz_total_recent_validation <- x_recent %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_validation %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation ", + "et zone de sante - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_validation <- x_recent %>% + count(date, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_validation %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Areas with less than 100 alerts removed from graph but kept in table. + +```{r health_area_total, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(n >= 100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + +```{r table_ha_total_validation, fig.keep = "all"} + +table_ha_total_validation <- x %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = epiweek_report_label, fill = status)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de sante - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_validation, fig.keep = "all"} + +table_ha_over_time_validation <- x %>% + count(epiweek_report, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_validation %>% + show_table() + +``` + + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Areas with less than 20 alerts removed from graph but kept in table. + +``` {r health_area_total_recent, fig.width = 14} + +x_validations <- x %>% + filter(date > start_date) %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(n >= 20) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + " depuis les 3 semaines dernières"), + subtitle = "avec plus de 20 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_validation, fig.keep = "all"} + +table_ha_total_recent_validation <- x %>% + filter(date > start_date) %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_validation %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent, fig.width = 12, fig.height = 8} + +x_validations <- x %>% + filter(date > start_date) %>% + filter(top_aires != "other") %>% + count(date, status, top_aires) + +ggplot(x_validations, aes(x = date, y = n, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de sante - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_validation <- x %>% + filter(date > start_date) %>% + count(date, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_validation %>% + show_table() + +``` + + + + + +# Origins {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their origin + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + + +## Overall + +### Weekly, since database start + +```{r sous_coord_origins} + +ggplot(x, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = "Nombre d'alertes par origine et par semaine") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +```{r sous_coord_time_table_origins, fig.keep = "all"} + +table_origins <- x %>% + count(epiweek_report, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r origins_time_recent} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + rotate_x_text(45) + + large_txt + + theme(legend.position = "bottom", + panel.spacing.y = unit(1, "lines")) + + labs(title = "Nombre d'alertes par origine et par semaine", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_origins_recent, fig.keep = "all"} + +table_origins_recent <- x_recent %>% + count(date, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins_recent %>% + show_table() + +``` + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_origin} + +ggplot(x, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_origins, fig.keep = "all"} + +table_hz_total_origins <- x %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_origin, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine de validation ", + "et zone de santé - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_origins, fig.keep = "all"} + +table_hz_over_time_origins <- x %>% + count(epiweek_report, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_origins %>% + show_table() + +``` + + +## Health Zone - Past 3 Weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_origin_recent} + +ggplot(x_recent, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et zone de santé depuis\n", + "les 3 semaines dernières")) + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_origins, fig.keep = "all"} + +table_hz_total_recent_origins <- x_recent %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_origins %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine de validation ", + "et zone de sante - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_origins <- x_recent %>% + count(date, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_origins %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Areas with less than 100 alerts removed from graph but kept in table. + +``` {r health_area_total_origins, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(n >= 100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_origins, fig.keep = "all"} + +table_ha_total_origins <- x %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_origin, fig.width = 12, fig.height = 8} + +x_origins <- x %>% + filter(top_aires != "other") + +ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation", + " et zone de sante - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_origins, fig.keep = "all"} + +table_ha_over_time_origins <- x %>% + count(epiweek_report, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_origins %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Areas with less than 20 alerts removed from graph but kept in table. + +``` {r health_area_total_recent_origin, fig.width = 14} + +x_origins <- x %>% + filter(date > start_date) %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(n >= 20) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + "\ndepuis les 3 semaines dernières"), + subtitle = "avec plus de 20 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - Total past 3 weeks + +```{r table_ha_total_recent_origins, fig.keep = "all"} + +table_ha_total_recent_origins <- x %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_origins %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} + +x_origins_recent <- x %>% + filter(top_aires != "other", + date > start_date) %>% + count(date, origin, top_aires) + +ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de santé - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_origins <- x %>% + filter(date > start_date) %>% + count(date, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_origins %>% + show_table() + +``` + + + + + +# Validation decisions {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section focuses on the decision of alerts, by health zones. + +When looking at decisions on the treatment of alerts, we identify the 4 +following situations: + +* **true positive**: alerts were rightfully + validated (patient tested) +* **true negative**: alerts were rightfully not + validated (patient not tested) +* **false positive**: alerts were wrongly + validated (patient tested, shoud not have been) +* **false negative**: alerts were wrongly not + validated (patient not tested, should have been) + +**False positive** create a waste of resources as well as un-necessary pressure +on the patient and community. **False negative** create a risk of missing cases. + +For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". + + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + +## Overall + +### Weekly, since database start + +```{r decisions} + +ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar(color = "white") + + scale_x_discrete(drop = FALSE) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + scale_decisions + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +``` {r table_decisions} + +table_decisions <- outcomes %>% + count(epiweek_report, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(c("row", "col")) %>% + mutate(prop_false_positive_95ci = + prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), + prop_false_negative_95ci = + prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), + prop_false_positive = prop_to_perc(false_positive / Total), + lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), + upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), + prop_false_negative = prop_to_perc(false_negative / Total), + lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), + upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) + +table_decisions %>% + select(-prop_false_positive, + -lower_false_positive, + -upper_false_positive, + -prop_false_negative , + -lower_false_negative, + -upper_false_negative + ) %>% + show_table() + +``` + + +### Table - incorrectly validated/not validated + +* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. +* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. + +```{r table_sens_spec} + +table_sens_spec <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + mutate(total_tested = true_positive + false_positive, + total_not_tested = true_negative + false_negative) %>% + mutate(incorrectly_validated_95ci = + prop_to_display_ci(false_positive, total_tested, + dec = 2, perc = TRUE), + incorrectly_not_validated_95ci = + prop_to_display_ci(false_negative, total_not_tested, + dec = 2, perc = TRUE)) +table_sens_spec %>% + show_table() + +``` + +`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. + +**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. + + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r decisions_time} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + rotate_x_text(45) + + large_txt + + scale_decisions + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + labs(title = paste("Nombre d'alertes par décision de validation"), + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Over time table + +``` {r table_decisions_recent} + +table_decisions_recent <- incidence(outcomes_recent$date, "day", + groups = outcomes_recent$decision_comparison) %>% + as.data.frame() %>% + adorn_totals(where = c("row", "col")) + +table_decisions_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_decision} + +ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision et zone de santé") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + rotate_x_text(45) + +``` + +### Table - Total since database start + +```{r table_hz_total_decisions, fig.keep = "all"} + +table_hz_total_decisions <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_decision, fig.width = 12, fig.height = 8} + +ggplot(outcomes, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_decisions, fig.keep = "all"} + +table_hz_over_time_decisions <- outcomes %>% + count(epiweek_report, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_decisions %>% + show_table() + +``` + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_decision_recent} + +ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision et zone de santé depuis", + "\nles 3 semaines dernières")) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_decisions, fig.keep = "all"} + +table_hz_total_recent_decisions <- outcomes_recent %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_decisions %>% + show_table() + +``` + +### Daily past 3 weeks + +```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de sante - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_decisions %>% + show_table() + +``` + + + +## Health Area + +### Total since database start + +Only area with at least 100 alerts are presented in the graph all areas are presented in the table. + +``` {r health_area_total_decisions, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(n >= 100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_decision, fig.keep = "all"} + +table_ha_total_decisions <- outcomes %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_decision, fig.width = 12, fig.height = 8} + +x_decisions <- outcomes %>% + filter(top_aires != "other") + +ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de sante - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_decisions, fig.keep = "all"} + +table_ha_over_time_decisions <- outcomes %>% + count(epiweek_report, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_decisions %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Only area with atleast than 20 alerts are presented in the graph all areas are presented in the table. + +``` {r health_area_total_recent_decision, fig.width = 14} + +x_decisions <- outcomes %>% + filter(date > start_date) %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(n >= 20) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé", + "\ndepuis les 3 semaines dernières"), + subtitle = "avec plus de 20 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_decisions, fig.keep = "all"} + +table_ha_total_recent_decisions <- outcomes %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_decisions %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} + +x_decisions_recent <- outcomes %>% + filter(top_aires != "other", + date > start_date) + +ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_decisions} + +table_ha_over_time_recent_decisions <- outcomes %>% + filter(date > start_date) %>% + count(date, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_decisions %>% + show_table() + +``` + + + + + + +# Alert Rates {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. + +**Note:** Rate are only calculated for areas with population data. There +are `r length(non_pop_data)` Aire de Santes without population data, which totals + **`r alerts_no_pop`** alerts. + +**Note: All known Aire de Santes are reported in tables** + + + +```{r reactivate_alert_rates, include = FALSE} + +if(allNA(x$as_population)){ + knitr::opts_chunk$set(eval = FALSE) + no_pop_data <- TRUE + +} else{ + cat("There is no population data therefore alert rates can not be calculated.") +} + +``` + + + +## Overall + +### Weekly, Since Database Start + +```{r alert_rates} + +x_alert_rate_sous_co <- x %>% + count(epiweek_report_label, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, + aes(x = epiweek_report_label, y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes pour \n", + "la sous-coordination de Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + +### Table - Weekly Since Database Start + +```{r table_alert_rates_per_week, fig.keep = "all"} + +table_alert_rates_per_week <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = statut_inconnu + invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_per_week %>% + show_table() + +``` + + + +## Overall Past 3 Weeks + +### Daily, Past 3 Weeks + +``` {r alert_rates_recent} + +x_alert_rate_sous_co <- x_recent %>% + count(date, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes pour \n", + "la sous-coordination de Bunia")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - Weekly Since Database Start + +```{r table_alert_rates_recent, fig.keep = "all"} + +table_alert_rates_recent <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total, Since Database Start + +``` {r alert_rate_zone_de_sante_total_validation_status} + +date_span <- database_date - min(x$date, na.rm = TRUE) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(status, zone_de_sante, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(epiweek_report_label, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation, et zone de santé")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + + +## Health Zone Past 3 Weeks + +### Total, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} + +x_alert_rate_zone_sante <- x %>% + filter(date > start_date, + as_population > 0, + top_zones != "other") %>% + count(status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * 3)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé depuis le 3 semaines", + " \ndernières")) + + large_txt + + theme(legend.position = "bottom") + + +``` + +### Daily, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} + +x_alert_rate_zone_sante <- x %>% + filter(date > start_date, + zs_population > 0, + top_zones != "other") %>% + count(date, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + + + +## Health Area + +### Total, Since Database Start + +``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} + +date_span <- database_date - min(x$date) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, top_zones, aire_de_sante, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut ", + "de validation et aire de santé")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x %>% + filter(top_aires != "other", + as_population > 0) %>% + count(epiweek_report_label, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / as_population) + +ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " de validation et aire de santé")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + +## Health Area - Past 3 Weeks + +``` {r alert_rate_as_3_weeks, fig.width = 14} + +x_alert_rate_aire_sante_recent <- x %>% + filter(top_zones != "other", + date > start_date, + as_population > 0) %>% + count(status, aire_de_sante, top_zones, as_population) %>% + mutate(alert_rate = (10000 * n) / (as_population * 3)) + +ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par 10000 \npersonnes et par semaine", + title = paste0("Nombre d'alertes par 10000 personnes et par semaine", + "\npour les 3 semaines dernières")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + +``` + + +### Daily, Past 3 Weeks + +``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x %>% + filter(date > start_date, + top_aires != "other", + as_population > 0) %>% + count(date, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population)) + +ggplot(data = x_alert_rate_aire_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " de validation et aire de santé")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + + + + + + +# Final status of alerts {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section investigates information on the proportions of false positive and false negative that became real cases. + + +## Overall final status by status decision comparison + +###Absolute number of alerts by final status and status decision comparison + +```{r final_status} + +outcomes %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_x", space = "free")+ + scale_final_outcome + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \nrègle de décision et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +###Percentage of final status by alert status decision + +```{r final_status_percentage} + +perc_final_outcome <- outcomes %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_non_cas = prop_to_perc(non_cas/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + +perc_final_outcome %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +##Final status for the past 21 days + +###Absolute number of alerts by final status and status decision comparison for the past 21 days + +```{r final_status_recent} + + +outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +###Percentage of final status by alert status decision for the past 21 days + +```{r final_status_percentage_recent} + +perc_final_outcome_recent <- outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= suspect + non_cas + confirme, + perc_non_cas = prop_to_perc(non_cas/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + + +perc_final_outcome_recent %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +### Table - final status overall + +```{r table_final_status, fig.keep = "all"} + +perc_final_outcome %>% + show_table() + +``` + + + +### Table - final status for the past 21 days + +```{r table_final_status_recent, fig.keep = "all"} + +perc_final_outcome_recent %>% + show_table() + +``` + + + + + +# Export data and tables {.tabset .tabset-fade .tabset-pills} + + +```{r reactivate_recent, include = FALSE} +knitr::opts_chunk$set(eval = TRUE) +``` + + +## Outline + +We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current +working directory. + + + + +## Export clean data + +We export some of the clean database, placed in `produced_rds/` as well as in +`data/clean/`: + +```{r export_rds, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +## create the text for the file name with the database date +rds_file_name <- sprintf("%sclean_%s.rds", + undated_file_name(current_bunia), + format(database_date, "%Y-%m-%d")) +rds_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_rds", rds_file_name)) + +``` + +We copy these files to the `data/clean` folder: + +```{r copy_rds, eval = TRUE} +# copy some files into `data/clean/` + +if (!dir.exists("data/clean")) { + dir.create("data/clean") +} + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + rds_file_name) +# Copy the rds data +file.copy(from = file.path("produced_rds", rds_file_name), + to = destination, + overwrite = TRUE) + +``` + + + + +## Excel files + +The following code exports all tables named in `to_report` to `xslx` files, +stored inside the folder `produced_xlsx`: + +### Cleaned alerts database + +```{r exports_tables, eval = TRUE} + +cleaned_alerts_database_bunia <- x + +to_export <- c("cleaned_alerts_database_bunia", + "table_validation_overall_time", + "table_validation_overall_past_3_weeks", + "table_hz_total_validation", + "table_hz_over_time_validation", + "table_hz_total_recent_validation", + "table_hz_over_time_recent_validation", + "table_ha_total_validation", + "table_ha_over_time_validation", + "table_ha_total_recent_validation", + "table_ha_over_time_recent_validation", + "table_origins", + "table_origins_recent", + "table_hz_total_origins", + "table_hz_over_time_origins", + "table_hz_total_recent_origins", + "table_hz_over_time_recent_origins", + "table_ha_total_origins", + "table_ha_over_time_origins", + "table_ha_total_recent_origins", + "table_ha_over_time_recent_origins", + "table_decisions", + "table_sens_spec", + "table_decisions_recent", + "table_hz_total_decisions", + "table_hz_over_time_decisions", + "table_hz_total_recent_decisions", + "table_hz_over_time_recent_decisions", + "table_ha_total_decisions", + "table_ha_over_time_decisions", + "table_ha_total_recent_decisions", + "table_ha_over_time_recent_decisions", + "perc_final_outcome", + "perc_final_outcome_recent", + "table_unknown_as") + +``` + +```{r xlsx_exports, eval = TRUE} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_xlsx", + paste0(e, ".xlsx"))) +} + +``` + + +We copy the main data file to the `data/clean` folder: + +```{r export_xlsx, eval = TRUE} + +## create the text for the file name with the database date +xlsx_file_name <- sprintf("%sclean_%s.xlsx", + undated_file_name(current_bunia), + format(database_date, "%Y-%m-%d")) +xlsx_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_xlsx", xlsx_file_name)) + +``` + + +```{r copy_xlsx, eval = TRUE} +# copy some files into `data/clean/` + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + xlsx_file_name) +# Copy the rds data +file.copy(from = file.path("produced_xlsx", xlsx_file_name), + to = destination, + overwrite = TRUE) + +``` + + + +Click on the following links to open the files (only works if the files above +have been generated and are in the same folder as this document): + + +```{r xlsx_links, results = "asis", eval = TRUE} + +for (e in to_export) { + txt <- sprintf("- [%s.xlsx](%s.xlsx)", + e, + file.path("produced_xlsx", + e)) + cat(txt, sep = "\n") +} + +``` + + + +## R objects + +The following code exports all tables named in `to_report` to `rds` files, +stored inside the folder `produced_rds`: + +```{r rds_exports, eval = TRUE} + +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_rds", + paste0(e, ".rds"))) +} + +``` + + + + + +# System information {.tabset .tabset-fade .tabset-pills} + + +## Outline + +The following information documents the system on which the document was +compiled. + + + +## System + +This provides information on the operating system. + +```{r system_info} +Sys.info() +``` + + +## R environment + +This provides information on the version of R used: + +```{r R_session} +R.version +``` + + + +## R packages + +This provides information on the packages used: + +```{r R_pkg} +sessionInfo() +``` + + +## Compilation parameters + +This shows which parameters were passed through `params` at compilation time: + +```{r params} +params +``` diff --git a/alerts/report_sources/alerts_butembo_2019-12-10.Rmd b/alerts/report_sources/alerts_butembo_2019-12-31.Rmd similarity index 96% rename from alerts/report_sources/alerts_butembo_2019-12-10.Rmd rename to alerts/report_sources/alerts_butembo_2019-12-31.Rmd index 7f91ade..050232d 100644 --- a/alerts/report_sources/alerts_butembo_2019-12-10.Rmd +++ b/alerts/report_sources/alerts_butembo_2019-12-31.Rmd @@ -1,2843 +1,2859 @@ ---- -title: "Investigation of alerts data: Butembo" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma and Aminata Ndiaye for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Butembo. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_butembo -x_raw <- custom_import(current_butembo) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_butembo) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zones_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(as)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = as.character(source_notif)) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% - mutate(status = conc_final) %>% - mutate(status = case_when(status == "validee" ~ "validee", - status == "invalidee" ~ "invalidee", - TRUE ~ "statut_inconnu")) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Butembo present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. - - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) - -## create a variable for the final status of the alert -x <- x %>% - mutate(final_outcome = class_final) %>% - mutate(final_outcome = factor(final_outcome, levels = c("confirme", - "non_cas", - "suspect"))) - -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -butembo_ha_pop <- pop_data$population -index <- which(is.na(butembo_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - naus, - diarr, - asth, - anor, - abdo, - thor, - musc, - arti, - ceph, - toux, - resp, - aval, - gor, - hoq, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(naus, - diarr, - asth, - anor, - abdo, - thor, - musc, - arti, - ceph, - toux, - resp, - aval, - gor, - hoq, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 21 -x_recent <- filter(x, date > start_date) -outcomes_recent <- filter(outcomes, date > start_date) - -``` - - - -## Alerts counts split - -Calculate number of alerts greater than or equal to 1000 or less than 1000 over all time. Greater than or less than 200 for recent. There are many aire de santes and this helps to restrict what can be seen. - -```{r alert_count_split} - -greater_1000 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 1000) %>% - pull(aire_de_sante) - - -less_1000 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 1000) %>% - pull(aire_de_sante) - - -less_1000[less_1000 %in% greater_1000] <- NA -less_1000 <- less_1000[complete.cases(less_1000)] - - -greater_200_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 200) %>% - pull(aire_de_sante) -less_200_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 200) %>% - pull(aire_de_sante) - -less_200_recent[less_200_recent %in% greater_200_recent] <- NA -less_200_recent <- less_200_recent[complete.cases(less_200_recent)] - - -``` - - - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "Validation correcte", - true_negative = "Invalidation correcte", - false_positive = "Fausse alerte", - false_negative = "Alerte manquée")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - - -scale_final_outcome <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(confirme = "#ff9999", - suspect = "#264d73", - non_cas = "#8cb3d9"), - labels = c(confirme = "Cas confirme", - suspect= "Cas suspect", - non_cas = "Non-cas" - - )) - -scale_final_outcome_perc <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(perc_confirme = "#ff9999", - perc_suspect = "#264d73", - perc_non_cas = "#8cb3d9"), - labels = c(perc_confirme = "Cas confirme", - perc_suspect= "Cas suspect", - perc_non_cas = "Non-cas" - - )) - - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Butembo")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Butembo")) + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées par semaine - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé - Butembo") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de santé - Butembo"), -subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 1000 alerts removed from graph but kept in table. - -```{r health_area_total_greater100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_1000) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Butembo", - subtitle = "avec plus de 1000 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_less100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_1000) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Butembo", - subtitle = "avec moins de 1000 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 200 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_greater20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_200_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", "\n(avec plus de 200 alertes) - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -``` {r health_area_total_recent_less20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_200_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", "\n(avec moins de 200 alertes) - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x_recent %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x_recent %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x_recent %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine - Butembo") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine - Butembo", - subtitle = "Données des 3 dernières semaines ", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et", "\nzone de santé - Butembo")) + - theme(legend.position = "bottom", , axis.text.x = element_text(size=17)) + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine ", - "et zone de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et", " \nzone de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom", , axis.text.x = element_text(size=17)) + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine ", - "et zone de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Areas with less than 1000 alerts removed from graph but kept in table. - -``` {r health_area_total_origins_greater100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_1000) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Butembo", - subtitle = "avec plus de 1000 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -``` {r health_area_total_origins_less100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_1000) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Butembo", - subtitle = "avec moins de 1000 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - " et aire de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size= 17)) + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Areas with less than 200 alerts removed from graph but kept in table. - -``` {r health_area_total_recent_origin_greater20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_200_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "\n(avec plus de 200 alertes) - Butembo"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - - -``` {r health_area_total_recent_origin__less20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_200_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "\n(avec moins de 200 alertes) - Butembo"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x_recent %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x_recent %>% - filter(top_aires != "other") %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - " et aire de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x_recent %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". - - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation - Butembo") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - labs(title = paste("Nombre d'alertes par décision de validation - Butembo"), - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé - Butembo") + - theme(legend.position = "bottom", axis.text.x = element_text(size=17)) + - guides(fill=guide_legend(ncol=2)) + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom", axis.text.x = element_text(size=17)) + - guides(fill=guide_legend(ncol=2)) - - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - guides(fill=guide_legend(ncol=2)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Only area with at least 1000 (not 100) alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_decisions_greater1000, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_1000) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Butembo", - subtitle = "avec plus de 1000 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -``` {r health_area_total_decisions_less1000, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_1000) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Butembo", - subtitle = "avec moins de 1000 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Only area with at least than 200 alerts are presented in the graph all areas are presented in the table. - -``` {r health_area_total_recent_decision_greater20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_200_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé - Butembo", - "\n(avec plus de 200 alertes)"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -``` {r health_area_total_recent_decision_less20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_200_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé - Butembo", - "\n(avec moins de 200 alertes)"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes_recent %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes_recent %>% - filter(top_aires != "other") - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Butembo"), -subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " \nde validation et aire de santé - Butembo")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x_recent %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " \nde validation et aire de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x_recent %>% - filter( - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " \nde validation et aire de santé - Butembo"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - - - -# Final status of alerts {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section investigates information on the proportions of false positive and false negative that became real cases. - - -## Overall final status by status decision comparison - -###Absolute number of alerts by final status and status decision comparison - -```{r final_status} - -outcomes %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_x", space = "free")+ - scale_final_outcome + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -###Percentage of final status by alert status decision - -```{r final_status_percentage} - -perc_final_outcome <- outcomes %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas + suspect, - perc_confirme = prop_to_perc(confirme/total), - perc_suspect = prop_to_perc(suspect/total), - perc_non_cas = prop_to_perc(non_cas/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - -perc_final_outcome %>% - ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -##Final status for the past 21 days - -###Absolute number of alerts by final status and status decision comparison for the past 21 days - -```{r final_status_recent} - -outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -###Percentage of final status by alert status decision for the past 21 days - -```{r final_status_percentage_recent} - -perc_final_outcome_recent <- outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas + suspect, - perc_confirme = prop_to_perc(confirme/total), - perc_suspect = prop_to_perc(suspect/total), - perc_non_cas = prop_to_perc(non_cas/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - - -perc_final_outcome_recent %>% - ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -### Table - final status overall - -```{r table_final_status, fig.keep = "all"} - -perc_final_outcome %>% - show_table() - -``` - - - -### Table - final status for the past 21 days - -```{r table_final_status_recent, fig.keep = "all"} - -perc_final_outcome_recent %>% - show_table() - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_butembo), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_butembo <- x - -to_export <- c("cleaned_alerts_database_butembo", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "perc_final_outcome", - "perc_final_outcome_recent", - "table_unknown_as") - -``` - - -```{r xlsx_exports, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_xlsx")) { - dir.create("produced_xlsx") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - - - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_butembo), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` +--- +title: "Investigation of alerts data: Butembo" +author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma, Aminata Ndiaye, and Emma Glennon for the analytic cell OEC Goma" +date: "`r format(Sys.time(), '%A %d %B %Y')`" +output: + html_document: + code_folding: hide + highlight: zenburn + number_sections: yes + theme: spacelab + toc: yes + toc_collapse: no + toc_depth: 1 + toc_float: yes + css: !expr here::here('css', 'style.css') +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set(echo = TRUE, + eval = TRUE, + collapse = TRUE, + fig.width = 8, + fig.height = 6, + dpi = 150, + warning = FALSE, + message = FALSE, + fig.path = "figures/") +``` + + +
+ +**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) + +**Code contributors:** Chris Jarvis, Charlie Whittaker + +**Data contributors:** Surveillance team + +**Version:** 1.0.0 + +**Reviewed by:** Thibaut Jombart + +**Notice**: this is a **stable, routine report**. **Do not touch it unless it is +broken.** To make a contribution, carefully read the +[README](../../../../../README.html) file. + + + + + + + + +# Data preparation {.tabset .tabset-fade .tabset-pills} + + + +## Outline + +This report cleans and analyses the alert data of Butembo. Input comes from an +`xlsx` file containing alerts in a specific format. Because all +sub-coordinations have different standards, each sub-coordination needs a +separate report. + +The data preparation involves the following steps, detailed in the following +tabs: + +* **Load scripts**: loads libraries and useful scripts used in the analyses; all + `.R` files contained in `scripts` at the root of the factory are automatically + loaded + +* **Load data**: imports datasets, and may contain some *ad hoc* changes to the +data such as specific data cleaning (not used in other reports), new variables +used in the analyses, etc. + +* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is + not used in other reports (otherwise cleaning should be done in a dedicated + report); this section is also used to create new variables used in the + analyses + +## Load scripts + +These scripts will load: + +* all local scripts, stored as `.R` filesinside `/scripts/` +* all Overall scripts, i.e. stored outside the factory in `../scripts/` + +```{r read_scripts} + +## read scripts +path_to_scripts <- here::here("scripts") +scripts_files <- dir(path_to_scripts, pattern = ".R$", + full.names = TRUE) +for (file in scripts_files) source(file, local = TRUE) + +ggthemr("fresh") + +``` + + + +## Load alerts data + +We extract the completion date from the file name: + +```{r load_alerts_data} + +## load the data +current_butembo +x_raw <- custom_import(current_butembo) +glimpse(x_raw) + +## extract database date from the file name +file_name <- gsub("^[^.]+/", "", current_butembo) +database_date <- file_name %>% + guess_dates() +database_date + +``` + +The **completion date** of the database is **`r format(database_date, format = +"%A %d %b %Y")`**. + + +## Clean data + +We use *linelist*'s function `clean_data()` to: + +- remove all special characters from the data +- set all characters to lower case +- replace all accentuated and diacritic characters with their closest ascii + match in the latin alphabet +- replace all separators with a single `_` +- replace all mis-spelling using a Overall dictionary (see the file + `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R + session, after compiling the document +- (optionally) detect date formats and convert data to dates, including cases + where format varies within a column + + +This cleaning is achieved with: + +```{r data_cleaning} + +x <- x_raw %>% + clean_data(guess_dates = FALSE, + wordlists = cleaning_rules) %>% + as_tibble() + +``` + + +## Renaming variables and sanity checks + +The following variables will be used, and are therefore checked: + +- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` +- sanitize `genre` +- make `origin` as a sanitized version of `source_notif` + +```{r check_variables} + +## date of notification +x <- x %>% mutate(date = guess_dates(date)) +range(x$date, na.rm = TRUE) + +# REVIEW: Remove NA dates +date_na <- sum(is.na(x$date)) +x <- x %>% filter(!is.na(date)) +# Check that na dates are removed + +## gender +table(x$sexe, useNA = "ifany") + +## contact connu +x <- mutate(x, contact_connu = lien) +table(x$contact_connu, useNA = "ifany") + +## zone de sante +x <- mutate(x, zone_de_sante = as.character(zones_sante)) +table(x$zone_de_sante, useNA = "ifany") + +## aire de sante +x <- mutate(x, aire_de_sante = as.character(as)) +table(x$aire_de_sante, useNA = "ifany") + +## origin / source_notif +x <- mutate(x, origin = as.character(source_notif)) +table(x$origin, useNA = "ifany") + +# create variable for alert validation +x <- x %>% + mutate(status = conc_final) %>% + mutate(status = case_when(status == "validee" ~ "validee", + status == "invalidee" ~ "invalidee", + TRUE ~ "statut_inconnu")) %>% + mutate(status = factor(status, levels = c("statut_inconnu", + "invalidee", + "validee"))) +table(x$status, useNA = "ifany") + +# another round of cleaning +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + + +There are `r length(date_na)` missing dates in the data. + + + +## Variable creation + +The following variables are created: + +- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert +was reported in. +- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de +Santes) are grouped into "other". +- `top_aires` - which describes the 14 most active Aire de Santes (out of all the +legitimate Aire de Santes belonging to Butembo present in the database). Note - All +other Aire de Santes are aggregated into the category "other". + +- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. + + +Note - All other Aire de Santes are aggregated into the category "other". + +``` {r variable_creation} +## epiweek report +x <- x %>% + mutate(epiweek_report = + aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% + mutate(epiweek_report_label = + aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) + + +x <- x %>% + mutate(top_zones = top_values(zone_de_sante, 5)) %>% + mutate(top_aires = top_values(aire_de_sante, 14)) + +## create a variable for the final status of the alert +x <- x %>% + mutate(final_outcome = class_final) %>% + mutate(final_outcome = factor(final_outcome, levels = c("confirme", + "non_cas", + "suspect"))) + +``` + +One last round of dictionary-based cleaning: + +```{r last_cleaning} + +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + +Remove data from before 2019-01-01 as not as reliable. + +```{r remove_pre_2019} + +x <- x %>% + filter(date >= "2019-01-01") + +``` + + +## Outcomes + +Outcome is defined for alerts which have either been validated or invalidated. + +```{r outcomes} + +## get only known outcomes +outcomes <- x %>% + filter(status %in% c("validee", "invalidee")) %>% + droplevels() %>% + mutate(validee = 1 * (status == "validee")) + +``` + + +## Check admin areas are correct + +```{r load_area_names_data} + +## load the data +current_spatial +area_names <- rio::import(current_spatial) %>% + clean_data(guess_dates = FALSE) + +as_names <- unique(area_names$as) +zs_names <- unique(area_names$zs) + +``` + +### Identify unknown areas + +```{r identify_area_names_data} + +## load the data +n_rows <- nrow(x) + +unknown_as <- x %>% + filter(!aire_de_sante %in% as_names) + +most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) + +start_date <- database_date - 20 +unknown_as_3weeks <- x %>% + filter(!aire_de_sante %in% as_names, + date >= start_date) + +``` + + +There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. + +```{r show_unknown_area_names} + +table_unknown_as <- unknown_as %>% + count(zone_de_sante, aire_de_sante) + +table_unknown_as %>% + show_table() + +``` + + +## Filter to known aire de santes + +```{r known_aire_de_sante} + +x <- x %>% + filter(aire_de_sante %in% as_names) + +``` + + + +## Add population data + +Load the population data + +```{r load_pop_data} + +## load the data +current_pop +pop_data <- rio::import(current_pop) +glimpse(pop_data) + +pop_data <- pop_data %>% + clean_data(guess_dates = FALSE) %>% + as_tibble() +``` + +### Check for area not in alerts + +```{r filter_by_alerts} + +## load the data +pop_data <- pop_data %>% + filter(aire_de_sante %in% x$aire_de_sante, + zone_de_sante %in% x$zone_de_sante) + +pop_data + +## aire de sante population +aire_de_sante_pop <- pop_data %>% + mutate(as_population = population) %>% + select(zone_de_sante, aire_de_sante, as_population) + +## zone de sante population +zone_de_sante_pop <- pop_data %>% + group_by(zone_de_sante) %>% + summarise(zs_population = sum(population, na.rm = TRUE)) + +total_population <- sum(unique(pop_data$population), na.rm = TRUE) + +## adding populations to main data +x <- x %>% + left_join(aire_de_sante_pop, + by = c("aire_de_sante", "zone_de_sante")) %>% + left_join(zone_de_sante_pop, + by = c("zone_de_sante")) + +if(!allNA(x$as_population)){ + x <- x %>% + mutate(total_population = total_population) +} + + +``` + + + +## Check population linkage (review regularly) + +```{r check_population_linkage} + +# Calculating number of individuals successfully linked to population data +number_individuals <- nrow(x) +number_individuals_aire_linked <- sum(!is.na(x$as_population)) +prop_linked <- sum(number_individuals_aire_linked / number_individuals) + +# Compare these two quantities - ensure there aren't any mispellings we're missing +table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching + +# Identifying Aire de Santes with missing population data +butembo_ha_pop <- pop_data$population +index <- which(is.na(butembo_ha_pop)) +non_pop_data <- pop_data$aire_de_sante[index] + +# Identifying number of alerts linked to Aire de Sante with +# missing population data +alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) + +``` + +There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. + + +## Expected decisions + +Alerts are supposed to be validated using the following key, depending on +whether the person has: + +1. **known contact** with a case and at least **one symptom** +2. if not 1, **unexplained bleeding** +3. if not 2, **fever and 3 other symptoms** + +Note that 3 de facto excludes bleeding as a candidate symptom. We create a new +variable which asserts these conditions: + +```{r expected_result} + +## elements of diagnostic +## criteria 1 +has_contact <- outcomes$contact_connu %in% c("confirme", "oui") +has_one_symptom <- outcomes %>% + select(fievre, + naus, + diarr, + asth, + anor, + abdo, + thor, + musc, + arti, + ceph, + toux, + resp, + aval, + gor, + hoq, + bleed) %>% + apply(1, function(e) any(e == "oui", na.rm = TRUE)) +fits_1 <- has_contact & has_one_symptom + +## criteria 2 +has_bleeding <- outcomes$bleed == "oui" +fits_2 <- !fits_1 & has_bleeding + +## criteria 3 +has_fever <- outcomes$fievre == "oui" +has_3_symptoms <- outcomes %>% + select(naus, + diarr, + asth, + anor, + abdo, + thor, + musc, + arti, + ceph, + toux, + resp, + aval, + gor, + hoq, + bleed) %>% + apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 +fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms + + +## make sure criteria are exclusive - no number in the table below should exceed +## 1. i.e this table should only zero or one as categories. +table(fits_1 + fits_2 + fits_3) + +## expected decisions +outcomes <- outcomes %>% + mutate(admit_contact = fits_1, + admit_bleeding = fits_2, + admit_fever = fits_3) %>% + mutate(expected_decision = ifelse( + fits_1 | fits_2 | fits_3, + "validee", + "invalidee"), + decision_comparison = case_when( + status == "validee" & expected_decision == "validee" ~ + "true_positive", + status == "invalidee" & expected_decision == "invalidee" ~ + "true_negative", + status == "validee" & expected_decision == "invalidee" ~ + "false_positive", + status == "invalidee" & expected_decision == "validee" ~ + "false_negative", + TRUE ~ NA_character_ + ), + decision_comparison = + factor(decision_comparison, + levels = c("true_positive", + "true_negative", + "false_positive", + "false_negative"))) + +## remove alerts with missing comparisons, from 2019 +outcomes <- outcomes %>% + filter(!is.na(decision_comparison), + date >= as.Date("2019-01-01")) + + +## check that classification is well-made +outcomes %>% + group_by(status, expected_decision, decision_comparison) %>% + count() + + +``` + + + +## Last 21 days + +We duplicate the previous datasets, retaining the 21 days leading up to the +current database date. + +```{r subset_21_days} + +start_date <- database_date - 21 +x_recent <- filter(x, date > start_date) +outcomes_recent <- filter(outcomes, date > start_date) + +``` + + + +## Alerts counts split + +Calculate number of alerts greater than or equal to 1000 or less than 1000 over all time. Greater than or less than 200 for recent. There are many aire de santes and this helps to restrict what can be seen. + +```{r alert_count_split} + +greater_1000 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 1000) %>% + pull(aire_de_sante) + + +less_1000 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 1000) %>% + pull(aire_de_sante) + + +less_1000[less_1000 %in% greater_1000] <- NA +less_1000 <- less_1000[complete.cases(less_1000)] + + +greater_200_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 200) %>% + pull(aire_de_sante) +less_200_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 200) %>% + pull(aire_de_sante) + +less_200_recent[less_200_recent %in% greater_200_recent] <- NA +less_200_recent <- less_200_recent[complete.cases(less_200_recent)] + + +``` + + + + +## Custom color scales + +We define custom colors for some of the variables used in the plots. + +```{r scales_fill} + +scale_origins <- scale_fill_manual( + "Origine", + values = c(communautaire = "#ffcc00", + recherche_active = "#c3c388", + surveillance_passive = "#ff6699", + point_entree = "#40bf80", + autre = "#668cff", + inconnu = "grey", + check_cleaning_rules = "grey")) + +scale_decisions <- scale_fill_manual( + "Décisions", + values = c(true_positive = "#94b8b8", + true_negative = "#8c8cd9", + false_positive = "#ff8080", + false_negative = "#b3003b"), + labels = c(true_positive = "Validation correcte", + true_negative = "Invalidation correcte", + false_positive = "Fausse alerte", + false_negative = "Alerte manquée")) + +scale_validations <- scale_fill_manual( + "Outcome", + values = c(statut_inconnu = "#BCB4A4", + validee = "#D56F3E", + invalidee = "#F2C69B"), + labels = c(validee = "Validée", + invalidee = "Invalidée", + statut_inconnu = "Statut Inconnu")) + + +scale_final_outcome <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(confirme = "#ff9999", + suspect = "#264d73", + non_cas = "#8cb3d9"), + labels = c(confirme = "Cas confirme", + suspect= "Cas suspect", + non_cas = "Non-cas" + + )) + +scale_final_outcome_perc <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(perc_confirme = "#ff9999", + perc_suspect = "#264d73", + perc_non_cas = "#8cb3d9"), + labels = c(perc_confirme = "Cas confirme", + perc_suspect= "Cas suspect", + perc_non_cas = "Non-cas" + + )) + + +``` + + + + + + +# Validation status {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + +## Overall + +### Weekly, since database start + +```{r sous_coord_time} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation - Butembo")) + + large_txt + + rotate_x_text(45) + + scale_months + + theme(legend.position = "bottom") + +``` + + +### Table - weekly since database start + +```{r sous_coord_time_table, fig.keep = "all"} + +table_validation_overall_time <- x %>% + count(epiweek_report, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_time %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r sous_coord_recent} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation - Butembo")) + + large_txt + + rotate_x_text(45) + + scale_weeks + + theme(legend.position = "bottom") + +``` + +### Table - daily past 3 weeks + +```{r sous_coord_recent_table, fig.keep = "all"} + +table_validation_overall_past_3_weeks <- x_recent %>% + count(date, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_past_3_weeks %>% + show_table() + +``` + + +## Overall proportion validated + +```{r sous_co_proportion_validated} + +x_prop <- x %>% + count(epiweek_report_label, status) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + p = prop_to_perc(validee / total), + lci = prop_ci(validee, total, "lower", TRUE), + uci = prop_ci(validee, total, "upper", TRUE)) %>% + select(epiweek_report_label, p, lci, uci) + +ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + + geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + + geom_point(aes(y = p), size = 2) + + geom_line(aes(y = p), size = 1) + + scale_color_discrete(guide = FALSE) + + ylim(c(0, 100)) + + labs(x = "", + y = "Pourcentage d'alertes validées \npar semaine", + title = paste0("Proportion d'alertes validées par semaine - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total} + +ggplot(x, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et \nzone de santé - Butembo") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_validation, fig.keep = "all"} + +table_hz_total_validation <- x %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation ", + " et zone de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_validation, fig.keep = "all"} + +table_hz_over_time_validation <- x %>% + count(epiweek_report, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_validation %>% + show_table() + +``` + + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent} + +ggplot(x_recent, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation", + "et \nzone de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_validation, fig.keep = "all"} + +table_hz_total_recent_validation <- x_recent %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_validation %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation ", + "et zone de santé - Butembo"), +subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_validation <- x_recent %>% + count(date, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_validation %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Areas with less than 1000 alerts removed from graph but kept in table. + +```{r health_area_total_greater100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_1000) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Butembo", + subtitle = "avec plus de 1000 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_less100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_1000) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Butembo", + subtitle = "avec moins de 1000 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + +```{r table_ha_total_validation, fig.keep = "all"} + +table_ha_total_validation <- x %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = epiweek_report_label, fill = status)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de sante - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_validation, fig.keep = "all"} + +table_ha_over_time_validation <- x %>% + count(epiweek_report, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_validation %>% + show_table() + +``` + + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Areas with less than 200 alerts removed from graph but kept in table. + +``` {r health_area_total_recent_greater20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_200_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", "\n(avec plus de 200 alertes) - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +``` {r health_area_total_recent_less20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_200_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", "\n(avec moins de 200 alertes) - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_validation, fig.keep = "all"} + +table_ha_total_recent_validation <- x_recent %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_validation %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent, fig.width = 12, fig.height = 8} + +x_validations <- x_recent %>% + filter(top_aires != "other") %>% + count(date, status, top_aires) + +ggplot(x_validations, aes(x = date, y = n, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de sante - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_validation <- x_recent %>% + count(date, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_validation %>% + show_table() + +``` + + + + + +# Origins {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their origin + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + + +## Overall + +### Weekly, since database start + +```{r sous_coord_origins} + +ggplot(x, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = "Nombre d'alertes par origine - Butembo") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +```{r sous_coord_time_table_origins, fig.keep = "all"} + +table_origins <- x %>% + count(epiweek_report, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r origins_time_recent} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + rotate_x_text(45) + + large_txt + + theme(legend.position = "bottom", + panel.spacing.y = unit(1, "lines")) + + labs(title = "Nombre d'alertes par origine - Butembo", + subtitle = "Données des 3 dernières semaines ", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_origins_recent, fig.keep = "all"} + +table_origins_recent <- x_recent %>% + count(date, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins_recent %>% + show_table() + +``` + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_origin} + +ggplot(x, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et", "\nzone de santé - Butembo")) + + theme(legend.position = "bottom", , axis.text.x = element_text(size=17)) + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_origins, fig.keep = "all"} + +table_hz_total_origins <- x %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_origin, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine ", + "et zone de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_origins, fig.keep = "all"} + +table_hz_over_time_origins <- x %>% + count(epiweek_report, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_origins %>% + show_table() + +``` + + +## Health Zone - Past 3 Weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_origin_recent} + +ggplot(x_recent, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et", " \nzone de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom", , axis.text.x = element_text(size=17)) + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_origins, fig.keep = "all"} + +table_hz_total_recent_origins <- x_recent %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_origins %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine ", + "et zone de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_origins <- x_recent %>% + count(date, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_origins %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Areas with less than 1000 alerts removed from graph but kept in table. + +``` {r health_area_total_origins_greater100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_1000) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Butembo", + subtitle = "avec plus de 1000 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +``` {r health_area_total_origins_less100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_1000) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Butembo", + subtitle = "avec moins de 1000 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_origins, fig.keep = "all"} + +table_ha_total_origins <- x %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_origin, fig.width = 12, fig.height = 8} + +x_origins <- x %>% + filter(top_aires != "other") + +ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + " et aire de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size= 17)) + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_origins, fig.keep = "all"} + +table_ha_over_time_origins <- x %>% + count(epiweek_report, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_origins %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Areas with less than 200 alerts removed from graph but kept in table. + +``` {r health_area_total_recent_origin_greater20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_200_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "\n(avec plus de 200 alertes) - Butembo"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + + +``` {r health_area_total_recent_origin__less20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_200_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "\n(avec moins de 200 alertes) - Butembo"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - Total past 3 weeks + +```{r table_ha_total_recent_origins, fig.keep = "all"} + +table_ha_total_recent_origins <- x_recent %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_origins %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} + +x_origins_recent <- x_recent %>% + filter(top_aires != "other") %>% + count(date, origin, top_aires) + +ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + " et aire de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_origins <- x_recent %>% + count(date, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_origins %>% + show_table() + +``` + + + + + +# Validation decisions {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section focuses on the decision of alerts, by health zones. + +When looking at decisions on the treatment of alerts, we identify the 4 +following situations: + +* **true positive**: alerts were rightfully + validated (patient tested) +* **true negative**: alerts were rightfully not + validated (patient not tested) +* **false positive**: alerts were wrongly + validated (patient tested, shoud not have been) +* **false negative**: alerts were wrongly not + validated (patient not tested, should have been) + +**False positive** create a waste of resources as well as un-necessary pressure +on the patient and community. **False negative** create a risk of missing cases. + +For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". + + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + +## Overall + +### Weekly, since database start + +```{r decisions} + +ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar(color = "white") + + scale_x_discrete(drop = FALSE) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation - Butembo") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + scale_decisions + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +``` {r table_decisions} + +table_decisions <- outcomes %>% + count(epiweek_report, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(c("row", "col")) %>% + mutate(prop_false_positive_95ci = + prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), + prop_false_negative_95ci = + prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), + prop_false_positive = prop_to_perc(false_positive / Total), + lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), + upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), + prop_false_negative = prop_to_perc(false_negative / Total), + lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), + upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) + +table_decisions %>% + select(-prop_false_positive, + -lower_false_positive, + -upper_false_positive, + -prop_false_negative , + -lower_false_negative, + -upper_false_negative + ) %>% + show_table() + +``` + + +### Table - incorrectly validated/not validated + +* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. +* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. + +```{r table_sens_spec} + +table_sens_spec <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + mutate(total_tested = true_positive + false_positive, + total_not_tested = true_negative + false_negative) %>% + mutate(incorrectly_validated_95ci = + prop_to_display_ci(false_positive, total_tested, + dec = 2, perc = TRUE), + incorrectly_not_validated_95ci = + prop_to_display_ci(false_negative, total_not_tested, + dec = 2, perc = TRUE)) +table_sens_spec %>% + show_table() + +``` + +`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. + +**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. + + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r decisions_time} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + rotate_x_text(45) + + large_txt + + scale_decisions + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + labs(title = paste("Nombre d'alertes par décision de validation - Butembo"), + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Over time table + +``` {r table_decisions_recent} + +table_decisions_recent <- incidence(outcomes_recent$date, "day", + groups = outcomes_recent$decision_comparison) %>% + as.data.frame() %>% + adorn_totals(where = c("row", "col")) + +table_decisions_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_decision} + +ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision et zone de santé - Butembo") + + theme(legend.position = "bottom", axis.text.x = element_text(size=17)) + + guides(fill=guide_legend(ncol=2)) + + rotate_x_text(45) + +``` + +### Table - Total since database start + +```{r table_hz_total_decisions, fig.keep = "all"} + +table_hz_total_decisions <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_decision, fig.width = 12, fig.height = 8} + +ggplot(outcomes, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_decisions, fig.keep = "all"} + +table_hz_over_time_decisions <- outcomes %>% + count(epiweek_report, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_decisions %>% + show_table() + +``` + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_decision_recent} + +ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision et zone de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom", axis.text.x = element_text(size=17)) + + guides(fill=guide_legend(ncol=2)) + + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_decisions, fig.keep = "all"} + +table_hz_total_recent_decisions <- outcomes_recent %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_decisions %>% + show_table() + +``` + +### Daily past 3 weeks + +```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + guides(fill=guide_legend(ncol=2)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_decisions %>% + show_table() + +``` + + + +## Health Area + +### Total since database start + +Only area with at least 1000 (not 100) alerts are presented in the graph all areas are presented in the table. + +``` {r health_area_total_decisions_greater1000, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_1000) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Butembo", + subtitle = "avec plus de 1000 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +``` {r health_area_total_decisions_less1000, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_1000) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Butembo", + subtitle = "avec moins de 1000 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_decision, fig.keep = "all"} + +table_ha_total_decisions <- outcomes %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_decision, fig.width = 12, fig.height = 8} + +x_decisions <- outcomes %>% + filter(top_aires != "other") + +ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_decisions, fig.keep = "all"} + +table_ha_over_time_decisions <- outcomes %>% + count(epiweek_report, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_decisions %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Only area with at least than 200 alerts are presented in the graph all areas are presented in the table. + +``` {r health_area_total_recent_decision_greater20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_200_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé - Butembo", + "\n(avec plus de 200 alertes)"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +``` {r health_area_total_recent_decision_less20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_200_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé - Butembo", + "\n(avec moins de 200 alertes)"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total past 3 weeks + +```{r table_ha_total_recent_decisions, fig.keep = "all"} + +table_ha_total_recent_decisions <- outcomes_recent %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_decisions %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} + +x_decisions_recent <- outcomes_recent %>% + filter(top_aires != "other") + +ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Butembo"), +subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_decisions} + +table_ha_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_decisions %>% + show_table() + +``` + + + + + + +# Alert Rates {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. + +**Note:** Rate are only calculated for areas with population data. There +are `r length(non_pop_data)` Aire de Santes without population data, which totals + **`r alerts_no_pop`** alerts. + +**Note: All known Aire de Santes are reported in tables** + + + +```{r reactivate_alert_rates, include = FALSE} + +if(allNA(x$as_population)){ + knitr::opts_chunk$set(eval = FALSE) + no_pop_data <- TRUE + +} else{ + cat("There is no population data therefore alert rates can not be calculated.") +} + +``` + + + +## Overall + +### Weekly, Since Database Start + +```{r alert_rates} + +x_alert_rate_sous_co <- x %>% + count(epiweek_report_label, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, + aes(x = epiweek_report_label, y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + +### Table - Weekly Since Database Start + +```{r table_alert_rates_per_week, fig.keep = "all"} + +table_alert_rates_per_week <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = statut_inconnu + invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_per_week %>% + show_table() + +``` + + + +## Overall Past 3 Weeks + +### Daily, Past 3 Weeks + +``` {r alert_rates_recent} + +x_alert_rate_sous_co <- x_recent %>% + count(date, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - Weekly Since Database Start + +```{r table_alert_rates_recent, fig.keep = "all"} + +table_alert_rates_recent <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = statut_inconnu + invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total, Since Database Start + +``` {r alert_rate_zone_de_sante_total_validation_status} + +date_span <- database_date - min(x$date, na.rm = TRUE) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(status, zone_de_sante, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(epiweek_report_label, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation, et zone de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + + +## Health Zone Past 3 Weeks + +### Total, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + as_population > 0, + top_zones != "other") %>% + count(status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * 3)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + theme(legend.position = "bottom") + + +``` + +### Daily, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + zs_population > 0, + top_zones != "other") %>% + count(date, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_weeks + +``` + + + +## Health Area + +### Total, Since Database Start + +``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} + +date_span <- database_date - min(x$date) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, top_zones, aire_de_sante, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut ", + "de validation et aire de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x %>% + filter(top_aires != "other", + as_population > 0) %>% + count(epiweek_report_label, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / as_population) + +ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " \nde validation et aire de santé - Butembo")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + +## Health Area - Past 3 Weeks + +``` {r alert_rate_as_3_weeks, fig.width = 14} + +x_alert_rate_aire_sante_recent <- x_recent %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, aire_de_sante, top_zones, as_population) %>% + mutate(alert_rate = (10000 * n) / (as_population * 3)) + +ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par 10000 \npersonnes et par semaine", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " \nde validation et aire de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + +``` + + +### Daily, Past 3 Weeks + +``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x_recent %>% + filter( + top_aires != "other", + as_population > 0) %>% + count(date, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population)) + +ggplot(data = x_alert_rate_aire_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " \nde validation et aire de santé - Butembo"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + + + + + + +# Final status of alerts {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section investigates information on the proportions of false positive and false negative that became real cases. + + +## Overall final status by status decision comparison + +###Absolute number of alerts by final status and status decision comparison + +```{r final_status} + +outcomes %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_x", space = "free")+ + scale_final_outcome + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +###Percentage of final status by alert status decision + +```{r final_status_percentage} + +perc_final_outcome <- outcomes %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_suspect = prop_to_perc(suspect/total), + perc_non_cas = prop_to_perc(non_cas/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + +perc_final_outcome %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +##Final status for the past 21 days + +###Absolute number of alerts by final status and status decision comparison for the past 21 days + +```{r final_status_recent} + +outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +###Percentage of final status by alert status decision for the past 21 days + +```{r final_status_percentage_recent} + +perc_final_outcome_recent <- outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_suspect = prop_to_perc(suspect/total), + perc_non_cas = prop_to_perc(non_cas/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + + +perc_final_outcome_recent %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +### Table - final status overall + +```{r table_final_status, fig.keep = "all"} + +perc_final_outcome %>% + show_table() + +``` + + + +### Table - final status for the past 21 days + +```{r table_final_status_recent, fig.keep = "all"} + +perc_final_outcome_recent %>% + show_table() + +``` + + + + + +# Export data and tables {.tabset .tabset-fade .tabset-pills} + + +```{r reactivate_recent, include = FALSE} +knitr::opts_chunk$set(eval = TRUE) +``` + + +## Outline + +We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current +working directory. + + + + +## Export clean data + +We export some of the clean database, placed in `produced_rds/` as well as in +`data/clean/`: + +```{r export_rds, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +## create the text for the file name with the database date +rds_file_name <- sprintf("%sclean_%s.rds", + undated_file_name(current_butembo), + format(database_date, "%Y-%m-%d")) +rds_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_rds", rds_file_name)) + +``` + +We copy these files to the `data/clean` folder: + +```{r copy_rds, eval = TRUE} +# copy some files into `data/clean/` + +if (!dir.exists("data/clean")) { + dir.create("data/clean") +} + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + rds_file_name) +# Copy the rds data +file.copy(from = file.path("produced_rds", rds_file_name), + to = destination, + overwrite = TRUE) + +``` + + + + +## Excel files + +The following code exports all tables named in `to_report` to `xslx` files, +stored inside the folder `produced_xlsx`: + +### Cleaned alerts database + +```{r exports_tables, eval = TRUE} + +cleaned_alerts_database_butembo <- x + +to_export <- c("cleaned_alerts_database_butembo", + "table_validation_overall_time", + "table_validation_overall_past_3_weeks", + "table_hz_total_validation", + "table_hz_over_time_validation", + "table_hz_total_recent_validation", + "table_hz_over_time_recent_validation", + "table_ha_total_validation", + "table_ha_over_time_validation", + "table_ha_total_recent_validation", + "table_ha_over_time_recent_validation", + "table_origins", + "table_origins_recent", + "table_hz_total_origins", + "table_hz_over_time_origins", + "table_hz_total_recent_origins", + "table_hz_over_time_recent_origins", + "table_ha_total_origins", + "table_ha_over_time_origins", + "table_ha_total_recent_origins", + "table_ha_over_time_recent_origins", + "table_decisions", + "table_sens_spec", + "table_decisions_recent", + "table_hz_total_decisions", + "table_hz_over_time_decisions", + "table_hz_total_recent_decisions", + "table_hz_over_time_recent_decisions", + "table_ha_total_decisions", + "table_ha_over_time_decisions", + "table_ha_total_recent_decisions", + "table_ha_over_time_recent_decisions", + "perc_final_outcome", + "perc_final_outcome_recent", + "table_unknown_as") + +``` + + +```{r xlsx_exports, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_xlsx")) { + dir.create("produced_xlsx") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_xlsx", + paste0(e, ".xlsx"))) +} + +``` + +We copy the main data file to the `data/clean` folder: + +```{r export_xlsx, eval = TRUE} + + + +## create the text for the file name with the database date +xlsx_file_name <- sprintf("%sclean_%s.xlsx", + undated_file_name(current_butembo), + format(database_date, "%Y-%m-%d")) +xlsx_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_xlsx", xlsx_file_name)) + +``` + + +```{r copy_xlsx, eval = TRUE} +# copy some files into `data/clean/` + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + xlsx_file_name) +# Copy the rds data +file.copy(from = file.path("produced_xlsx", xlsx_file_name), + to = destination, + overwrite = TRUE) + +``` + + + +Click on the following links to open the files (only works if the files above +have been generated and are in the same folder as this document): + + +```{r xlsx_links, results = "asis", eval = TRUE} + +for (e in to_export) { + txt <- sprintf("- [%s.xlsx](%s.xlsx)", + e, + file.path("produced_xlsx", + e)) + cat(txt, sep = "\n") +} + +``` + + + +## R objects + +The following code exports all tables named in `to_report` to `rds` files, +stored inside the folder `produced_rds`: + +```{r rds_exports, eval = TRUE} + +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_rds", + paste0(e, ".rds"))) +} + +``` + + + + + +# System information {.tabset .tabset-fade .tabset-pills} + + +## Outline + +The following information documents the system on which the document was +compiled. + + + +## System + +This provides information on the operating system. + +```{r system_info} +Sys.info() +``` + + +## R environment + +This provides information on the version of R used: + +```{r R_session} +R.version +``` + + + +## R packages + +This provides information on the packages used: + +```{r R_pkg} +sessionInfo() +``` + + +## Compilation parameters + +This shows which parameters were passed through `params` at compilation time: + +```{r params} +params +``` diff --git a/alerts/report_sources/alerts_goma_2019-12-10.Rmd b/alerts/report_sources/alerts_goma_2019-12-31.Rmd similarity index 96% rename from alerts/report_sources/alerts_goma_2019-12-10.Rmd rename to alerts/report_sources/alerts_goma_2019-12-31.Rmd index 1835566..2e5fb5c 100644 --- a/alerts/report_sources/alerts_goma_2019-12-10.Rmd +++ b/alerts/report_sources/alerts_goma_2019-12-31.Rmd @@ -1,2844 +1,2860 @@ ---- -title: "Investigation of alerts data: Goma" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma, and Aminata Ndiaye for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Goma. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_goma -x_raw <- custom_import(current_goma) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_goma) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(aire_sante)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = type_surveillance) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Goma present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -Note - All other Aire de Santes are aggregated into the category "other". - -- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) - -#create a variable for the final status of the alert -x <- x %>% - mutate(final_outcome = classif_final) %>% - mutate(final_outcome = factor(final_outcome, levels = c("confirme", - "non_cas", - "suspect"))) - -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -goma_ha_pop <- pop_data$population -index <- which(is.na(goma_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 21 -x_recent <- filter(x, date > start_date) -outcomes_recent <- filter(outcomes, date > start_date) - -``` - - - -## Alerts counts split - -Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. - -```{r alert_count_split} - -greater_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 100) %>% - pull(aire_de_sante) - -less_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 100) %>% - pull(aire_de_sante) - -less_100[less_100 %in% greater_100] <- NA -less_100 <- less_100[complete.cases(less_100)] - - -greater_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 20) %>% - pull(aire_de_sante) -less_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 20) %>% - pull(aire_de_sante) - -less_20_recent[less_20_recent %in% greater_20_recent] <- NA -less_20_recent <- less_20_recent[complete.cases(less_20_recent)] - - -``` - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "Validation correcte", - true_negative = "Invalidation correcte", - false_positive = "Fausse alerte", - false_negative = "Alerte manquée")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - -scale_final_outcome <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(confirme = "#ff9999", - suspect = "#264d73", - non_cas = "#8cb3d9"), - labels = c(confirme = "Cas confirme", - suspect= "Cas suspect", - non_cas = "Non-cas" - - )) - -scale_final_outcome_perc <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(perc_confirme = "#ff9999", - perc_suspect = "#264d73", - perc_non_cas = "#8cb3d9"), - labels = c(perc_confirme = "Cas confirme", - perc_suspect= "Cas suspect", - perc_non_cas = "Non-cas" - - )) - - -``` - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Goma")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé - Goma") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom", strip.text = element_text(size=17)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table - -```{r health_area_total_greater100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = TRUE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Goma", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_less100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Goma", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " \net aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table - -```{r health_area_total_recent_greater20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\n(avec plus de 20 alertes) - Goma"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_recent_less20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\n(avec moins de 20 alertes) - Goma"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x_recent %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x_recent %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x_recent %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine - Goma") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine - Goma", - subtitle = "Données des 3 dernières semaines ", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé - Goma") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - "et zone de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_origins_greter100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Goma", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_origins_less100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Goma", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - " et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -``` {r health_area_total_recent_origin_greater20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "\n(avec plus de 20 alertes) - Goma"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -``` {r health_area_total_recent_origin_less20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "\n(avec moins de 20 alertes) - Goma"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x_recent %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin, fig.width = 12, fig.height = 8} - -x_origins_recent <- x_recent %>% - filter(top_aires != "other") %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - " et aire de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x_recent %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". - - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation - Goma") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - labs(title = paste("Nombre d'alertes par décision de validation - Goma"), - subtitle = "Données des 3 dernières semaines", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation", "\n et zone de santé - Goma")) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation ", " \net zone de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_decisionsgreater100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation", " \net aire de santé - Goma"), - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_decisionsless100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation", " \net aire de santé - Goma"), - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -```{r health_area_total_recent_decision_greater20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation", " \net aire de santé (avec plus de 20 alertes) - Goma"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_recent_decision_less20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation", " \net aire de santé (avec moins de 20 alertes) - Goma"), - subtitle = "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes_recent %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes_recent %>% - filter(top_aires != "other", - date > start_date) - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -check_iconnu <- max(x_alert_rate_sous_co$status == "statut_inconnu") - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = ifelse(check_iconnu, - statut_inconnu + invalidee + validee, - invalidee + validee), - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -check_iconnu <- max(x_alert_rate_sous_co$status == "statut_inconnu") - - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = ifelse(check_iconnu, - statut_inconnu + invalidee + validee, - invalidee + validee), - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x %>% - filter( - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x %>% - filter(date > start_date, - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé - Goma")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x_recent %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x_recent %>% - filter( - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé - Goma"), - subtitle = "Données des 3 dernières semaines ") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - - - -# Final status of alerts {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section investigates information on the proportions of false positive and false negative that became real cases. - - -## Overall final status by status decision comparison - -###Absolute number of alerts by final status and status decision comparison - -```{r final_status} - -outcomes %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_x", space = "free")+ - scale_final_outcome + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -###Percentage of final status by alert status decision - -```{r final_status_percentage} - -perc_final_outcome <- outcomes %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas, - perc_confirme = prop_to_perc(confirme/total), - perc_non_cas = prop_to_perc(non_cas/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - -perc_final_outcome %>% - ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -##Final status for the past 21 days - -###Absolute number of alerts by final status and status decision comparison for the past 21 days - -```{r final_status_recent} - -outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -###Percentage of final status by alert status decision for the past 21 days - -```{r final_status_percentage_recent} - -perc_final_outcome_recent <- outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= non_cas , - perc_non_cas = prop_to_perc(non_cas/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - - -perc_final_outcome_recent %>% - ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -### Table - final status overall - -```{r table_final_status, fig.keep = "all"} - -perc_final_outcome %>% - show_table() - -``` - - - -### Table - final status for the past 21 days - -```{r table_final_status_recent, fig.keep = "all"} - -perc_final_outcome_recent %>% - show_table() - -``` - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_goma <- x - -to_export <- c("cleaned_alerts_database_goma", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "perc_final_outcome", - "perc_final_outcome_recent", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_xlsx")) { - dir.create("produced_xlsx") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_goma), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` +--- +title: "Investigation of alerts data: Goma" +author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma, Aminata Ndiaye, and Emma Glennon for the analytic cell OEC Goma" +date: "`r format(Sys.time(), '%A %d %B %Y')`" +output: + html_document: + code_folding: hide + highlight: zenburn + number_sections: yes + theme: spacelab + toc: yes + toc_collapse: no + toc_depth: 1 + toc_float: yes + css: !expr here::here('css', 'style.css') +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set(echo = TRUE, + eval = TRUE, + collapse = TRUE, + fig.width = 8, + fig.height = 6, + dpi = 150, + warning = FALSE, + message = FALSE, + fig.path = "figures/") +``` + + +
+ +**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) + +**Code contributors:** Chris Jarvis, Charlie Whittaker + +**Data contributors:** Surveillance team + +**Version:** 1.0.0 + +**Reviewed by:** Thibaut Jombart + +**Notice**: this is a **stable, routine report**. **Do not touch it unless it is +broken.** To make a contribution, carefully read the +[README](../../../../../README.html) file. + + + + + + + + +# Data preparation {.tabset .tabset-fade .tabset-pills} + + + +## Outline + +This report cleans and analyses the alert data of Goma. Input comes from an +`xlsx` file containing alerts in a specific format. Because all +sub-coordinations have different standards, each sub-coordination needs a +separate report. + +The data preparation involves the following steps, detailed in the following +tabs: + +* **Load scripts**: loads libraries and useful scripts used in the analyses; all + `.R` files contained in `scripts` at the root of the factory are automatically + loaded + +* **Load data**: imports datasets, and may contain some *ad hoc* changes to the +data such as specific data cleaning (not used in other reports), new variables +used in the analyses, etc. + +* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is + not used in other reports (otherwise cleaning should be done in a dedicated + report); this section is also used to create new variables used in the + analyses + +## Load scripts + +These scripts will load: + +* all local scripts, stored as `.R` filesinside `/scripts/` +* all Overall scripts, i.e. stored outside the factory in `../scripts/` + +```{r read_scripts} + +## read scripts +path_to_scripts <- here::here("scripts") +scripts_files <- dir(path_to_scripts, pattern = ".R$", + full.names = TRUE) +for (file in scripts_files) source(file, local = TRUE) + +ggthemr("fresh") + +``` + + + +## Load alerts data + +We extract the completion date from the file name: + +```{r load_alerts_data} + +## load the data +current_goma +x_raw <- custom_import(current_goma) +glimpse(x_raw) + +## extract database date from the file name +file_name <- gsub("^[^.]+/", "", current_goma) +database_date <- file_name %>% + guess_dates() +database_date + +``` + +The **completion date** of the database is **`r format(database_date, format = +"%A %d %b %Y")`**. + + +## Clean data + +We use *linelist*'s function `clean_data()` to: + +- remove all special characters from the data +- set all characters to lower case +- replace all accentuated and diacritic characters with their closest ascii + match in the latin alphabet +- replace all separators with a single `_` +- replace all mis-spelling using a Overall dictionary (see the file + `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R + session, after compiling the document +- (optionally) detect date formats and convert data to dates, including cases + where format varies within a column + + +This cleaning is achieved with: + +```{r data_cleaning} + +x <- x_raw %>% + clean_data(guess_dates = FALSE, + wordlists = cleaning_rules) %>% + as_tibble() + +``` + + +## Renaming variables and sanity checks + +The following variables will be used, and are therefore checked: + +- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` +- sanitize `genre` +- make `origin` as a sanitized version of `source_notif` + +```{r check_variables} + +## date of notification +x <- x %>% mutate(date = guess_dates(date)) +range(x$date, na.rm = TRUE) + +# REVIEW: Remove NA dates +date_na <- sum(is.na(x$date)) +x <- x %>% filter(!is.na(date)) +# Check that na dates are removed + +## gender +table(x$sexe, useNA = "ifany") + +## contact connu +x <- mutate(x, contact_connu = lien_epi) +table(x$contact_connu, useNA = "ifany") + +## zone de sante +x <- mutate(x, zone_de_sante = as.character(zone_sante)) +table(x$zone_de_sante, useNA = "ifany") + +## aire de sante +x <- mutate(x, aire_de_sante = as.character(aire_sante)) +table(x$aire_de_sante, useNA = "ifany") + +## origin / source_notif +x <- mutate(x, origin = type_surveillance) +table(x$origin, useNA = "ifany") + +# create variable for alert validation +x <- x %>% mutate( + status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% + mutate(status = factor(status, levels = c("statut_inconnu", + "invalidee", + "validee"))) +table(x$status, useNA = "ifany") + +# another round of cleaning +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + + +There are `r length(date_na)` missing dates in the data. + + + +## Variable creation + +The following variables are created: + +- `bleed` - which describes whether the alert displayed any of the bleeding related +symptoms. +- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert +was reported in. +- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de +Santes) are grouped into "other". +- `top_aires` - which describes the 14 most active Aire de Santes (out of all the +legitimate Aire de Santes belonging to Goma present in the database). Note - All +other Aire de Santes are aggregated into the category "other". + +Note - All other Aire de Santes are aggregated into the category "other". + +- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. + +``` {r variable_creation} + +## bleed +bleed <- x %>% + select(contains("saignement")) %>% + apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) +x <- mutate(x, bleed = bleed) +table(x$bleed, useNA = "ifany") + +## epiweek report +x <- x %>% + mutate(epiweek_report = + aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% + mutate(epiweek_report_label = + aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) + + +x <- x %>% + mutate(top_zones = top_values(zone_de_sante, 5)) %>% + mutate(top_aires = top_values(aire_de_sante, 14)) + +#create a variable for the final status of the alert +x <- x %>% + mutate(final_outcome = classif_final) %>% + mutate(final_outcome = factor(final_outcome, levels = c("confirme", + "non_cas", + "suspect"))) + +``` + +One last round of dictionary-based cleaning: + +```{r last_cleaning} + +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + +Remove data from before 2019-01-01 as not as reliable. + +```{r remove_pre_2019} + +x <- x %>% + filter(date >= "2019-01-01") + +``` + + +## Outcomes + +Outcome is defined for alerts which have either been validated or invalidated. + +```{r outcomes} + +## get only known outcomes +outcomes <- x %>% + filter(status %in% c("validee", "invalidee")) %>% + droplevels() %>% + mutate(validee = 1 * (status == "validee")) + +``` + + +## Check admin areas are correct + +```{r load_area_names_data} + +## load the data +current_spatial +area_names <- rio::import(current_spatial) %>% + clean_data(guess_dates = FALSE) + +as_names <- unique(area_names$as) +zs_names <- unique(area_names$zs) + +``` + +### Identify unknown areas + +```{r identify_area_names_data} + +## load the data +n_rows <- nrow(x) + +unknown_as <- x %>% + filter(!aire_de_sante %in% as_names) + +most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) + +start_date <- database_date - 20 +unknown_as_3weeks <- x %>% + filter(!aire_de_sante %in% as_names, + date >= start_date) + +``` + + +There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. + +```{r show_unknown_area_names} + +table_unknown_as <- unknown_as %>% + count(zone_de_sante, aire_de_sante) + +table_unknown_as %>% + show_table() + +``` + + +## Filter to known aire de santes + +```{r known_aire_de_sante} + +x <- x %>% + filter(aire_de_sante %in% as_names) + +``` + + + +## Add population data + +Load the population data + +```{r load_pop_data} + +## load the data +current_pop +pop_data <- rio::import(current_pop) +glimpse(pop_data) + +pop_data <- pop_data %>% + clean_data(guess_dates = FALSE) %>% + as_tibble() +``` + +### Check for area not in alerts + +```{r filter_by_alerts} + +## load the data +pop_data <- pop_data %>% + filter(aire_de_sante %in% x$aire_de_sante, + zone_de_sante %in% x$zone_de_sante) + +pop_data + +## aire de sante population +aire_de_sante_pop <- pop_data %>% + mutate(as_population = population) %>% + select(zone_de_sante, aire_de_sante, as_population) + +## zone de sante population +zone_de_sante_pop <- pop_data %>% + group_by(zone_de_sante) %>% + summarise(zs_population = sum(population, na.rm = TRUE)) + +total_population <- sum(unique(pop_data$population), na.rm = TRUE) + +## adding populations to main data +x <- x %>% + left_join(aire_de_sante_pop, + by = c("aire_de_sante", "zone_de_sante")) %>% + left_join(zone_de_sante_pop, + by = c("zone_de_sante")) + +if(!allNA(x$as_population)){ + x <- x %>% + mutate(total_population = total_population) +} + + +``` + + + +## Check population linkage (review regularly) + +```{r check_population_linkage} + +# Calculating number of individuals successfully linked to population data +number_individuals <- nrow(x) +number_individuals_aire_linked <- sum(!is.na(x$as_population)) +prop_linked <- sum(number_individuals_aire_linked / number_individuals) + +# Compare these two quantities - ensure there aren't any mispellings we're missing +table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching + +# Identifying Aire de Santes with missing population data +goma_ha_pop <- pop_data$population +index <- which(is.na(goma_ha_pop)) +non_pop_data <- pop_data$aire_de_sante[index] + +# Identifying number of alerts linked to Aire de Sante with +# missing population data +alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) + +``` + +There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. + + +## Expected decisions + +Alerts are supposed to be validated using the following key, depending on +whether the person has: + +1. **known contact** with a case and at least **one symptom** +2. if not 1, **unexplained bleeding** +3. if not 2, **fever and 3 other symptoms** + +Note that 3 de facto excludes bleeding as a candidate symptom. We create a new +variable which asserts these conditions: + +```{r expected_result} + +## elements of diagnostic +## criteria 1 +has_contact <- outcomes$contact_connu %in% c("confirme", "oui") +has_one_symptom <- outcomes %>% + select(fievre, + nausee_vom, + diarrhee, + fatigue, + anorexie, + dlr_abdo, + dlr_thorax, + dlr_muscu, + dlr_artic, + cephalee, + toux, + dyspnee, + dysphagie, + odynophagie, + hoquet, + bleed) %>% + apply(1, function(e) any(e == "oui", na.rm = TRUE)) +fits_1 <- has_contact & has_one_symptom + +## criteria 2 +has_bleeding <- outcomes$bleed == "oui" +fits_2 <- !fits_1 & has_bleeding + +## criteria 3 +has_fever <- outcomes$fievre == "oui" +has_3_symptoms <- outcomes %>% + select(nausee_vom, + diarrhee, + fatigue, + anorexie, + dlr_abdo, + dlr_thorax, + dlr_muscu, + dlr_artic, + cephalee, + toux, + dyspnee, + dysphagie, + odynophagie, + hoquet, + bleed) %>% + apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 +fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms + + +## make sure criteria are exclusive - no number in the table below should exceed +## 1. i.e this table should only zero or one as categories. +table(fits_1 + fits_2 + fits_3) + +## expected decisions +outcomes <- outcomes %>% + mutate(admit_contact = fits_1, + admit_bleeding = fits_2, + admit_fever = fits_3) %>% + mutate(expected_decision = ifelse( + fits_1 | fits_2 | fits_3, + "validee", + "invalidee"), + decision_comparison = case_when( + status == "validee" & expected_decision == "validee" ~ + "true_positive", + status == "invalidee" & expected_decision == "invalidee" ~ + "true_negative", + status == "validee" & expected_decision == "invalidee" ~ + "false_positive", + status == "invalidee" & expected_decision == "validee" ~ + "false_negative", + TRUE ~ NA_character_ + ), + decision_comparison = + factor(decision_comparison, + levels = c("true_positive", + "true_negative", + "false_positive", + "false_negative"))) + +## remove alerts with missing comparisons, from 2019 +outcomes <- outcomes %>% + filter(!is.na(decision_comparison), + date >= as.Date("2019-01-01")) + + +## check that classification is well-made +outcomes %>% + group_by(status, expected_decision, decision_comparison) %>% + count() + + +``` + + + +## Last 21 days + +We duplicate the previous datasets, retaining the 21 days leading up to the +current database date. + +```{r subset_21_days} + +start_date <- database_date - 21 +x_recent <- filter(x, date > start_date) +outcomes_recent <- filter(outcomes, date > start_date) + +``` + + + +## Alerts counts split + +Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. + +```{r alert_count_split} + +greater_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 100) %>% + pull(aire_de_sante) + +less_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 100) %>% + pull(aire_de_sante) + +less_100[less_100 %in% greater_100] <- NA +less_100 <- less_100[complete.cases(less_100)] + + +greater_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 20) %>% + pull(aire_de_sante) +less_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 20) %>% + pull(aire_de_sante) + +less_20_recent[less_20_recent %in% greater_20_recent] <- NA +less_20_recent <- less_20_recent[complete.cases(less_20_recent)] + + +``` + + +## Custom color scales + +We define custom colors for some of the variables used in the plots. + +```{r scales_fill} + +scale_origins <- scale_fill_manual( + "Origine", + values = c(communautaire = "#ffcc00", + recherche_active = "#c3c388", + surveillance_passive = "#ff6699", + point_entree = "#40bf80", + autre = "#668cff", + inconnu = "grey", + check_cleaning_rules = "grey")) + +scale_decisions <- scale_fill_manual( + "Décisions", + values = c(true_positive = "#94b8b8", + true_negative = "#8c8cd9", + false_positive = "#ff8080", + false_negative = "#b3003b"), + labels = c(true_positive = "Validation correcte", + true_negative = "Invalidation correcte", + false_positive = "Fausse alerte", + false_negative = "Alerte manquée")) + +scale_validations <- scale_fill_manual( + "Outcome", + values = c(statut_inconnu = "#BCB4A4", + validee = "#D56F3E", + invalidee = "#F2C69B"), + labels = c(validee = "Validée", + invalidee = "Invalidée", + statut_inconnu = "Statut Inconnu")) + +scale_final_outcome <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(confirme = "#ff9999", + suspect = "#264d73", + non_cas = "#8cb3d9"), + labels = c(confirme = "Cas confirme", + suspect= "Cas suspect", + non_cas = "Non-cas" + + )) + +scale_final_outcome_perc <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(perc_confirme = "#ff9999", + perc_suspect = "#264d73", + perc_non_cas = "#8cb3d9"), + labels = c(perc_confirme = "Cas confirme", + perc_suspect= "Cas suspect", + perc_non_cas = "Non-cas" + + )) + + +``` + + +# Validation status {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + +## Overall + +### Weekly, since database start + +```{r sous_coord_time} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation - Goma")) + + large_txt + + rotate_x_text(45) + + scale_months + + theme(legend.position = "bottom") + +``` + + +### Table - weekly since database start + +```{r sous_coord_time_table, fig.keep = "all"} + +table_validation_overall_time <- x %>% + count(epiweek_report, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_time %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r sous_coord_recent} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + scale_weeks + + theme(legend.position = "bottom") + +``` + +### Table - daily past 3 weeks + +```{r sous_coord_recent_table, fig.keep = "all"} + +table_validation_overall_past_3_weeks <- x_recent %>% + count(date, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_past_3_weeks %>% + show_table() + +``` + + +## Overall proportion validated + +```{r sous_co_proportion_validated} + +x_prop <- x %>% + count(epiweek_report_label, status) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + p = prop_to_perc(validee / total), + lci = prop_ci(validee, total, "lower", TRUE), + uci = prop_ci(validee, total, "upper", TRUE)) %>% + select(epiweek_report_label, p, lci, uci) + +ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + + geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + + geom_point(aes(y = p), size = 2) + + geom_line(aes(y = p), size = 1) + + scale_color_discrete(guide = FALSE) + + ylim(c(0, 100)) + + labs(x = "", + y = "Pourcentage d'alertes validées \npar semaine", + title = paste0("Proportion d'alertes validées - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total} + +ggplot(x, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et \nzone de santé - Goma") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_validation, fig.keep = "all"} + +table_hz_total_validation <- x %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation ", + " et zone de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_validation, fig.keep = "all"} + +table_hz_over_time_validation <- x %>% + count(epiweek_report, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_validation %>% + show_table() + +``` + + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent} + +ggplot(x_recent, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation", + "et \nzone de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_validation, fig.keep = "all"} + +table_hz_total_recent_validation <- x_recent %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_validation %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation ", + "et zone de sante - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom", strip.text = element_text(size=17)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_validation <- x_recent %>% + count(date, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_validation %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table + +```{r health_area_total_greater100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = TRUE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Goma", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_less100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Goma", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + +```{r table_ha_total_validation, fig.keep = "all"} + +table_ha_total_validation <- x %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = epiweek_report_label, fill = status)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation", + " \net aire de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_validation, fig.keep = "all"} + +table_ha_over_time_validation <- x %>% + count(epiweek_report, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_validation %>% + show_table() + +``` + + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table + +```{r health_area_total_recent_greater20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + "\n(avec plus de 20 alertes) - Goma"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_recent_less20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + "\n(avec moins de 20 alertes) - Goma"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_validation, fig.keep = "all"} + +table_ha_total_recent_validation <- x_recent %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_validation %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent, fig.width = 12, fig.height = 8} + +x_validations <- x_recent %>% + filter(top_aires != "other") %>% + count(date, status, top_aires) + +ggplot(x_validations, aes(x = date, y = n, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_validation <- x_recent %>% + count(date, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_validation %>% + show_table() + +``` + + + + + +# Origins {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their origin + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + + +## Overall + +### Weekly, since database start + +```{r sous_coord_origins} + +ggplot(x, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = "Nombre d'alertes par origine - Goma") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +```{r sous_coord_time_table_origins, fig.keep = "all"} + +table_origins <- x %>% + count(epiweek_report, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r origins_time_recent} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + rotate_x_text(45) + + large_txt + + theme(legend.position = "bottom", + panel.spacing.y = unit(1, "lines")) + + labs(title = "Nombre d'alertes par origine - Goma", + subtitle = "Données des 3 dernières semaines ", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_origins_recent, fig.keep = "all"} + +table_origins_recent <- x_recent %>% + count(date, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins_recent %>% + show_table() + +``` + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_origin} + +ggplot(x, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et zone de santé - Goma") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_origins, fig.keep = "all"} + +table_hz_total_origins <- x %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_origin, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine de validation ", + "et zone de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_origins, fig.keep = "all"} + +table_hz_over_time_origins <- x %>% + count(epiweek_report, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_origins %>% + show_table() + +``` + + +## Health Zone - Past 3 Weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_origin_recent} + +ggplot(x_recent, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et zone de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_origins, fig.keep = "all"} + +table_hz_total_recent_origins <- x_recent %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_origins %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + "et zone de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_origins <- x_recent %>% + count(date, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_origins %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_origins_greter100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Goma", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_origins_less100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Goma", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_origins, fig.keep = "all"} + +table_ha_total_origins <- x %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_origin, fig.width = 12, fig.height = 8} + +x_origins <- x %>% + filter(top_aires != "other") + +ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + " et zone de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_origins, fig.keep = "all"} + +table_ha_over_time_origins <- x %>% + count(epiweek_report, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_origins %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +``` {r health_area_total_recent_origin_greater20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "\n(avec plus de 20 alertes) - Goma"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +``` {r health_area_total_recent_origin_less20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "\n(avec moins de 20 alertes) - Goma"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - Total past 3 weeks + +```{r table_ha_total_recent_origins, fig.keep = "all"} + +table_ha_total_recent_origins <- x_recent %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_origins %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_origin, fig.width = 12, fig.height = 8} + +x_origins_recent <- x_recent %>% + filter(top_aires != "other") %>% + count(date, origin, top_aires) + +ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + " et aire de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_origins <- x_recent %>% + count(date, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_origins %>% + show_table() + +``` + + + + + +# Validation decisions {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section focuses on the decision of alerts, by health zones. + +When looking at decisions on the treatment of alerts, we identify the 4 +following situations: + +* **true positive**: alerts were rightfully + validated (patient tested) +* **true negative**: alerts were rightfully not + validated (patient not tested) +* **false positive**: alerts were wrongly + validated (patient tested, shoud not have been) +* **false negative**: alerts were wrongly not + validated (patient not tested, should have been) + +**False positive** create a waste of resources as well as un-necessary pressure +on the patient and community. **False negative** create a risk of missing cases. + +For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". + + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + +## Overall + +### Weekly, since database start + +```{r decisions} + +ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar(color = "white") + + scale_x_discrete(drop = FALSE) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation - Goma") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + scale_decisions + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +``` {r table_decisions} + +table_decisions <- outcomes %>% + count(epiweek_report, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(c("row", "col")) %>% + mutate(prop_false_positive_95ci = + prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), + prop_false_negative_95ci = + prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), + prop_false_positive = prop_to_perc(false_positive / Total), + lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), + upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), + prop_false_negative = prop_to_perc(false_negative / Total), + lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), + upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) + +table_decisions %>% + select(-prop_false_positive, + -lower_false_positive, + -upper_false_positive, + -prop_false_negative , + -lower_false_negative, + -upper_false_negative + ) %>% + show_table() + +``` + + +### Table - incorrectly validated/not validated + +* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. +* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. + +```{r table_sens_spec} + +table_sens_spec <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + mutate(total_tested = true_positive + false_positive, + total_not_tested = true_negative + false_negative) %>% + mutate(incorrectly_validated_95ci = + prop_to_display_ci(false_positive, total_tested, + dec = 2, perc = TRUE), + incorrectly_not_validated_95ci = + prop_to_display_ci(false_negative, total_not_tested, + dec = 2, perc = TRUE)) +table_sens_spec %>% + show_table() + +``` + +`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. + +**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. + + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r decisions_time} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + rotate_x_text(45) + + large_txt + + scale_decisions + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + labs(title = paste("Nombre d'alertes par décision de validation - Goma"), + subtitle = "Données des 3 dernières semaines", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Over time table + +``` {r table_decisions_recent} + +table_decisions_recent <- incidence(outcomes_recent$date, "day", + groups = outcomes_recent$decision_comparison) %>% + as.data.frame() %>% + adorn_totals(where = c("row", "col")) + +table_decisions_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_decision} + +ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation", "\n et zone de santé - Goma")) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + rotate_x_text(45) + +``` + +### Table - Total since database start + +```{r table_hz_total_decisions, fig.keep = "all"} + +table_hz_total_decisions <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_decision, fig.width = 12, fig.height = 8} + +ggplot(outcomes, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_decisions, fig.keep = "all"} + +table_hz_over_time_decisions <- outcomes %>% + count(epiweek_report, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_decisions %>% + show_table() + +``` + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_decision} + +ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation ", " \net zone de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_decisions, fig.keep = "all"} + +table_hz_total_recent_decisions <- outcomes_recent %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_decisions %>% + show_table() + +``` + +### Daily past 3 weeks + +```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_decisions %>% + show_table() + +``` + + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_decisionsgreater100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation", " \net aire de santé - Goma"), + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_decisionsless100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation", " \net aire de santé - Goma"), + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_decision, fig.keep = "all"} + +table_ha_total_decisions <- outcomes %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_decision, fig.width = 12, fig.height = 8} + +x_decisions <- outcomes %>% + filter(top_aires != "other") + +ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_decisions, fig.keep = "all"} + +table_ha_over_time_decisions <- outcomes %>% + count(epiweek_report, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_decisions %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +```{r health_area_total_recent_decision_greater20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation", " \net aire de santé (avec plus de 20 alertes) - Goma"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_recent_decision_less20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation", " \net aire de santé (avec moins de 20 alertes) - Goma"), + subtitle = "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_decisions, fig.keep = "all"} + +table_ha_total_recent_decisions <- outcomes_recent %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_decisions %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} + +x_decisions_recent <- outcomes_recent %>% + filter(top_aires != "other", + date > start_date) + +ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_decisions} + +table_ha_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_decisions %>% + show_table() + +``` + + + + + + +# Alert Rates {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. + +**Note:** Rate are only calculated for areas with population data. There +are `r length(non_pop_data)` Aire de Santes without population data, which totals + **`r alerts_no_pop`** alerts. + +**Note: All known Aire de Santes are reported in tables** + + + +```{r reactivate_alert_rates, include = FALSE} + +if(allNA(x$as_population)){ + knitr::opts_chunk$set(eval = FALSE) + no_pop_data <- TRUE + cat("There is no population data therefore alert rates can not be calculated.") +} + +``` + + + +## Overall + +### Weekly, Since Database Start + +```{r alert_rates} + +x_alert_rate_sous_co <- x %>% + count(epiweek_report_label, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, + aes(x = epiweek_report_label, y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + +### Table - Weekly Since Database Start + +```{r table_alert_rates_per_week, fig.keep = "all"} + +check_iconnu <- max(x_alert_rate_sous_co$status == "statut_inconnu") + +table_alert_rates_per_week <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = ifelse(check_iconnu, + statut_inconnu + invalidee + validee, + invalidee + validee), + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_per_week %>% + show_table() + +``` + + + +## Overall Past 3 Weeks + +### Daily, Past 3 Weeks + +``` {r alert_rates_recent} + +x_alert_rate_sous_co <- x_recent %>% + count(date, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - Weekly Since Database Start + +```{r table_alert_rates_recent, fig.keep = "all"} + +check_iconnu <- max(x_alert_rate_sous_co$status == "statut_inconnu") + + +table_alert_rates_recent <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = ifelse(check_iconnu, + statut_inconnu + invalidee + validee, + invalidee + validee), + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total, Since Database Start + +``` {r alert_rate_zone_de_sante_total_validation_status} + +date_span <- database_date - min(x$date, na.rm = TRUE) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(status, zone_de_sante, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(epiweek_report_label, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation, et zone de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + + +## Health Zone Past 3 Weeks + +### Total, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} + +x_alert_rate_zone_sante <- x %>% + filter( + as_population > 0, + top_zones != "other") %>% + count(status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * 3)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + theme(legend.position = "bottom") + + +``` + +### Daily, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} + +x_alert_rate_zone_sante <- x %>% + filter(date > start_date, + zs_population > 0, + top_zones != "other") %>% + count(date, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + + + +## Health Area + +### Total, Since Database Start + +``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} + +date_span <- database_date - min(x$date) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, top_zones, aire_de_sante, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut ", + "de validation et aire de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x %>% + filter(top_aires != "other", + as_population > 0) %>% + count(epiweek_report_label, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / as_population) + +ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " de validation et aire de santé - Goma")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + +## Health Area - Past 3 Weeks + +``` {r alert_rate_as_3_weeks, fig.width = 14} + +x_alert_rate_aire_sante_recent <- x_recent %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, aire_de_sante, top_zones, as_population) %>% + mutate(alert_rate = (10000 * n) / (as_population * 3)) + +ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par 10000 \npersonnes et par semaine", + title = paste0("Nombre d'alertes par 10000 personnes - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + +``` + + +### Daily, Past 3 Weeks + +``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x_recent %>% + filter( + top_aires != "other", + as_population > 0) %>% + count(date, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population)) + +ggplot(data = x_alert_rate_aire_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " de validation et aire de santé - Goma"), + subtitle = "Données des 3 dernières semaines ") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + + + + + + +# Final status of alerts {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section investigates information on the proportions of false positive and false negative that became real cases. + + +## Overall final status by status decision comparison + +###Absolute number of alerts by final status and status decision comparison + +```{r final_status} + +outcomes %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_x", space = "free")+ + scale_final_outcome + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +###Percentage of final status by alert status decision + +```{r final_status_percentage} + +perc_final_outcome <- outcomes %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_non_cas = prop_to_perc(non_cas/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + +perc_final_outcome %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +##Final status for the past 21 days + +###Absolute number of alerts by final status and status decision comparison for the past 21 days + +```{r final_status_recent} + +outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +###Percentage of final status by alert status decision for the past 21 days + +```{r final_status_percentage_recent} + +perc_final_outcome_recent <- outcomes_recent %>% + filter(!is.na(final_outcome))%>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= non_cas + suspect + confirme, + perc_non_cas = prop_to_perc(non_cas/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + + +perc_final_outcome_recent %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +### Table - final status overall + +```{r table_final_status, fig.keep = "all"} + +perc_final_outcome %>% + show_table() + +``` + + + +### Table - final status for the past 21 days + +```{r table_final_status_recent, fig.keep = "all"} + +perc_final_outcome_recent %>% + show_table() + +``` + + + + +# Export data and tables {.tabset .tabset-fade .tabset-pills} + + +```{r reactivate_recent, include = FALSE} +knitr::opts_chunk$set(eval = TRUE) +``` + + +## Outline + +We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current +working directory. + + + + +## Export clean data + +We export some of the clean database, placed in `produced_rds/` as well as in +`data/clean/`: + +```{r export_rds, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +## create the text for the file name with the database date +rds_file_name <- sprintf("%sclean_%s.rds", + undated_file_name(current_goma), + format(database_date, "%Y-%m-%d")) +rds_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_rds", rds_file_name)) + +``` + +We copy these files to the `data/clean` folder: + +```{r copy_rds, eval = TRUE} +# copy some files into `data/clean/` + +if (!dir.exists("data/clean")) { + dir.create("data/clean") +} + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + rds_file_name) +# Copy the rds data +file.copy(from = file.path("produced_rds", rds_file_name), + to = destination, + overwrite = TRUE) + +``` + + + + +## Excel files + +The following code exports all tables named in `to_report` to `xslx` files, +stored inside the folder `produced_xlsx`: + +### Cleaned alerts database + +```{r exports_tables, eval = TRUE} + +cleaned_alerts_database_goma <- x + +to_export <- c("cleaned_alerts_database_goma", + "table_validation_overall_time", + "table_validation_overall_past_3_weeks", + "table_hz_total_validation", + "table_hz_over_time_validation", + "table_hz_total_recent_validation", + "table_hz_over_time_recent_validation", + "table_ha_total_validation", + "table_ha_over_time_validation", + "table_ha_total_recent_validation", + "table_ha_over_time_recent_validation", + "table_origins", + "table_origins_recent", + "table_hz_total_origins", + "table_hz_over_time_origins", + "table_hz_total_recent_origins", + "table_hz_over_time_recent_origins", + "table_ha_total_origins", + "table_ha_over_time_origins", + "table_ha_total_recent_origins", + "table_ha_over_time_recent_origins", + "table_decisions", + "table_sens_spec", + "table_decisions_recent", + "table_hz_total_decisions", + "table_hz_over_time_decisions", + "table_hz_total_recent_decisions", + "table_hz_over_time_recent_decisions", + "table_ha_total_decisions", + "table_ha_over_time_decisions", + "table_ha_total_recent_decisions", + "table_ha_over_time_recent_decisions", + "perc_final_outcome", + "perc_final_outcome_recent", + "table_unknown_as") + +``` + +```{r xlsx_exports, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_xlsx")) { + dir.create("produced_xlsx") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_xlsx", + paste0(e, ".xlsx"))) +} + +``` + + +We copy the main data file to the `data/clean` folder: + +```{r export_xlsx, eval = TRUE} + +## create the text for the file name with the database date +xlsx_file_name <- sprintf("%sclean_%s.xlsx", + undated_file_name(current_goma), + format(database_date, "%Y-%m-%d")) +xlsx_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_xlsx", xlsx_file_name)) + +``` + + +```{r copy_xlsx, eval = TRUE} +# copy some files into `data/clean/` + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + xlsx_file_name) +# Copy the rds data +file.copy(from = file.path("produced_xlsx", xlsx_file_name), + to = destination, + overwrite = TRUE) + +``` + + + +Click on the following links to open the files (only works if the files above +have been generated and are in the same folder as this document): + + +```{r xlsx_links, results = "asis", eval = TRUE} + +for (e in to_export) { + txt <- sprintf("- [%s.xlsx](%s.xlsx)", + e, + file.path("produced_xlsx", + e)) + cat(txt, sep = "\n") +} + +``` + + + +## R objects + +The following code exports all tables named in `to_report` to `rds` files, +stored inside the folder `produced_rds`: + +```{r rds_exports, eval = TRUE} + +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_rds", + paste0(e, ".rds"))) +} + +``` + + + + + +# System information {.tabset .tabset-fade .tabset-pills} + + +## Outline + +The following information documents the system on which the document was +compiled. + + + +## System + +This provides information on the operating system. + +```{r system_info} +Sys.info() +``` + + +## R environment + +This provides information on the version of R used: + +```{r R_session} +R.version +``` + + + +## R packages + +This provides information on the packages used: + +```{r R_pkg} +sessionInfo() +``` + + +## Compilation parameters + +This shows which parameters were passed through `params` at compilation time: + +```{r params} +params +``` diff --git a/alerts/report_sources/alerts_mambasa_2019-12-10.Rmd b/alerts/report_sources/alerts_mambasa_2019-12-31.Rmd similarity index 96% rename from alerts/report_sources/alerts_mambasa_2019-12-10.Rmd rename to alerts/report_sources/alerts_mambasa_2019-12-31.Rmd index cbda932..e6e426f 100644 --- a/alerts/report_sources/alerts_mambasa_2019-12-10.Rmd +++ b/alerts/report_sources/alerts_mambasa_2019-12-31.Rmd @@ -1,2810 +1,2872 @@ ---- -title: "Investigation of alerts data: Mambasa" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma and Aminata Ndiaye for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Mambasa. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_mambasa -x_raw <- custom_import(current_mambasa) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_mambasa) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien_epi) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zone_de_sante)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(as)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = moyen_de_transmission) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Mambasa present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) - - -#create a variable for the final status of the alert -x <- x %>% - mutate(final_outcome = classif_final) %>% - mutate(final_outcome = factor(final_outcome, levels = c("confirme", - "non_cas", - "suspect"))) - -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -mambasa_ha_pop <- pop_data$population -index <- which(is.na(mambasa_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(nausee_vom, - diarrhee, - fatigue, - anorexie, - dlr_abdo, - dlr_thorax, - dlr_muscu, - dlr_artic, - cephalee, - toux, - dyspnee, - dysphagie, - odynophagie, - hoquet, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 21 -x_recent <- filter(x, date > start_date) -outcomes_recent <- filter(outcomes, date > start_date) - -``` - - -## Alerts counts split - -Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. - -```{r alert_count_split} - -greater_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 100) %>% - pull(aire_de_sante) - -less_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 100) %>% - pull(aire_de_sante) - -less_100[less_100 %in% greater_100] <- NA -less_100 <- less_100[complete.cases(less_100)] - -greater_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 20) %>% - pull(aire_de_sante) -less_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 20) %>% - pull(aire_de_sante) - -less_20_recent[less_20_recent %in% greater_20_recent] <- NA -less_20_recent <- less_20_recent[complete.cases(less_20_recent)] - -``` - - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "Validation correcte", - true_negative = "Invalidation correcte", - false_positive = "Fausse alerte", - false_negative = "Alerte manquée")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - - -scale_final_outcome <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(confirme = "#ff9999", - suspect = "#264d73", - non_cas = "#8cb3d9"), - labels = c(confirme = "Cas confirme", - suspect= "Cas suspect", - non_cas = "Non-cas" - - )) - -scale_final_outcome_perc <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(perc_confirme = "#ff9999", - perc_suspect = "#264d73", - perc_non_cas = "#8cb3d9"), - labels = c(perc_confirme = "Cas confirme", - perc_suspect= "Cas suspect", - perc_non_cas = "Non-cas" - - )) - - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Mambasa")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé - Mambasa") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de sante - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table - -```{r health_area_total_greater100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Mambasa", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_less100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Mambasa", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table - -```{r health_area_total_recent_greater20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " (avec plus de 20 alertes) - Mambasa"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_recent_less20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - " (avec moins de 20 alertes) - Mambasa"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x_recent %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x_recent %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine - Mambasa") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine - Mambasa", - subtitle = "Données des trois dernières semaines", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé - Mambasa") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - "et zone de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et", " \nzone de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - "et zone de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_origins_greter100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Mambasa", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_origins_less100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Mambasa", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - " et zone de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -``` {r health_area_total_recent_origin_greater20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "(avec plus de 20 alertes) - Mambasa"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -``` {r health_area_total_recent_origin_less20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "(avec moins de 20 alertes) - Mambasa"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x_recent %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x_recent %>% - filter(top_aires != "other") %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - " et aire de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x_recent %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation - Mambasa") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - labs(title = paste("Nombre d'alertes par décision de validation - Mambasa"), - subtitle = "Données des trois dernières semaines", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé - Mambasa") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) - - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_decisionsgreater100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Mambasa", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_decisionsless100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Mambasa", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -```{r health_area_total_recent_decision_greater20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\n(avec plus de 20 alertes) - Mambasa"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_recent_decision_less20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\n(avec moins de 20 alertes) - Mambasa"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes_recent %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes_recent %>% - filter(top_aires != "other") - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Mambasa")) + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Mambasa"), -subtitle = "Données des trois dernières semaines") + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Mambasa")) + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " \nde validation et aire de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x_recent %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes - Mambasa"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x_recent %>% - filter( - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " \nde validation et aire de santé - Mambasa")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - - -# Final status of alerts {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section investigates information on the proportions of false positive and false negative that became real cases. - - -## Overall final status by status decision comparison - -###Absolute number of alerts by final status and status decision comparison - -```{r final_status} - -outcomes %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_final_outcome + - facet_grid(.~ top_zones , scales = "free_x", space = "free")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -###Percentage of final status by alert status decision - -```{r final_status_percentage} - -perc_final_outcome <- outcomes %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas + suspect, - perc_confirme = prop_to_perc(confirme/total), - perc_non_cas = prop_to_perc(non_cas/total), - perc_suspect = prop_to_perc(suspect/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - -perc_final_outcome %>% - ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -##Final status for the past 21 days - -###Absolute number of alerts by final status and status decision comparison for the past 21 days - -```{r final_status_recent} - - -outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -###Percentage of final status by alert status decision for the past 21 days - - -Only non-cases for the past 21 days as of teh database of October 14th 2019. - - -### Table - final status overall - -```{r table_final_status, fig.keep = "all"} - -perc_final_outcome %>% - show_table() - -``` - - - -### Table - final status for the past 21 days - -```{r table_final_status_recent, fig.keep = "all"} - -perc_final_outcome_recent %>% - show_table() - -``` - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_mambasa), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_mambasa <- x - -to_export <- c("cleaned_alerts_database_mambasa", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "perc_final_outcome", - "perc_final_outcome_recent", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_xlsx")) { - dir.create("produced_xlsx") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_mambasa), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` +--- +title: "Investigation of alerts data: Mambasa" +author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma, Aminata Ndiaye, and Emma Glennon for the analytic cell OEC Goma" +date: "`r format(Sys.time(), '%A %d %B %Y')`" +output: + html_document: + code_folding: hide + highlight: zenburn + number_sections: yes + theme: spacelab + toc: yes + toc_collapse: no + toc_depth: 1 + toc_float: yes + css: !expr here::here('css', 'style.css') +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set(echo = TRUE, + eval = TRUE, + collapse = TRUE, + fig.width = 8, + fig.height = 6, + dpi = 150, + warning = FALSE, + message = FALSE, + fig.path = "figures/") +``` + + +
+ +**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) + +**Code contributors:** Chris Jarvis, Charlie Whittaker + +**Data contributors:** Surveillance team + +**Version:** 1.0.0 + +**Reviewed by:** Thibaut Jombart + +**Notice**: this is a **stable, routine report**. **Do not touch it unless it is +broken.** To make a contribution, carefully read the +[README](../../../../../README.html) file. + + + + + + + + +# Data preparation {.tabset .tabset-fade .tabset-pills} + + + +## Outline + +This report cleans and analyses the alert data of Mambasa. Input comes from an +`xlsx` file containing alerts in a specific format. Because all +sub-coordinations have different standards, each sub-coordination needs a +separate report. + +The data preparation involves the following steps, detailed in the following +tabs: + +* **Load scripts**: loads libraries and useful scripts used in the analyses; all + `.R` files contained in `scripts` at the root of the factory are automatically + loaded + +* **Load data**: imports datasets, and may contain some *ad hoc* changes to the +data such as specific data cleaning (not used in other reports), new variables +used in the analyses, etc. + +* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is + not used in other reports (otherwise cleaning should be done in a dedicated + report); this section is also used to create new variables used in the + analyses + +## Load scripts + +These scripts will load: + +* all local scripts, stored as `.R` filesinside `/scripts/` +* all Overall scripts, i.e. stored outside the factory in `../scripts/` + +```{r read_scripts} + +## read scripts +path_to_scripts <- here::here("scripts") +scripts_files <- dir(path_to_scripts, pattern = ".R$", + full.names = TRUE) +for (file in scripts_files) source(file, local = TRUE) + +ggthemr("fresh") + +``` + + + +## Load alerts data + +We extract the completion date from the file name: + +```{r load_alerts_data} + +## load the data +current_mambasa +x_raw <- custom_import(current_mambasa) +glimpse(x_raw) + +## extract database date from the file name +file_name <- gsub("^[^.]+/", "", current_mambasa) +database_date <- file_name %>% + guess_dates() +database_date + +``` + +The **completion date** of the database is **`r format(database_date, format = +"%A %d %b %Y")`**. + + +## Clean data + +We use *linelist*'s function `clean_data()` to: + +- remove all special characters from the data +- set all characters to lower case +- replace all accentuated and diacritic characters with their closest ascii + match in the latin alphabet +- replace all separators with a single `_` +- replace all mis-spelling using a Overall dictionary (see the file + `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R + session, after compiling the document +- (optionally) detect date formats and convert data to dates, including cases + where format varies within a column + + +This cleaning is achieved with: + +```{r data_cleaning} + +x <- x_raw %>% + clean_data(guess_dates = FALSE, + wordlists = cleaning_rules) %>% + as_tibble() + +``` + + +## Renaming variables and sanity checks + +The following variables will be used, and are therefore checked: + +- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` +- sanitize `genre` +- make `origin` as a sanitized version of `source_notif` + +```{r check_variables} + +## date of notification +x <- x %>% mutate(date = guess_dates(date)) +range(x$date, na.rm = TRUE) + +# REVIEW: Remove NA dates +date_na <- sum(is.na(x$date)) +x <- x %>% filter(!is.na(date)) +# Check that na dates are removed + +## gender +table(x$sexe, useNA = "ifany") + +## contact connu +x <- mutate(x, contact_connu = lien_epi) +table(x$contact_connu, useNA = "ifany") + +## zone de sante +x <- mutate(x, zone_de_sante = as.character(zone_de_sante)) +table(x$zone_de_sante, useNA = "ifany") + +## aire de sante +x <- mutate(x, aire_de_sante = as.character(as)) +table(x$aire_de_sante, useNA = "ifany") + +## origin / source_notif +x <- mutate(x, origin = moyen_de_transmission) +table(x$origin, useNA = "ifany") + +# create variable for alert validation +x <- x %>% mutate( + status = ifelse(result_invest == "inconnu", "statut_inconnu", result_invest)) %>% + mutate(status = factor(status, levels = c("statut_inconnu", + "invalidee", + "validee"))) +table(x$status, useNA = "ifany") + +# another round of cleaning +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + + +There are `r length(date_na)` missing dates in the data. + + + +## Variable creation + +The following variables are created: + +- `bleed` - which describes whether the alert displayed any of the bleeding related +symptoms. +- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert +was reported in. +- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de +Santes) are grouped into "other". +- `top_aires` - which describes the 14 most active Aire de Santes (out of all the +legitimate Aire de Santes belonging to Mambasa present in the database). Note - All +other Aire de Santes are aggregated into the category "other". + +- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. + +Note - All other Aire de Santes are aggregated into the category "other". + +``` {r variable_creation} + +## bleed +bleed <- x %>% + select(contains("saignement")) %>% + apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) +x <- mutate(x, bleed = bleed) +table(x$bleed, useNA = "ifany") + +## epiweek report +x <- x %>% + mutate(epiweek_report = + aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% + mutate(epiweek_report_label = + aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) + + + +x <- x %>% + mutate(top_zones = top_values(zone_de_sante, 5)) %>% + mutate(top_aires = top_values(aire_de_sante, 14)) + + +#create a variable for the final status of the alert +x <- x %>% + mutate(final_outcome = classif_final) %>% + mutate(final_outcome = factor(final_outcome, levels = c("confirme", + "non_cas", + "suspect"))) + +``` + +One last round of dictionary-based cleaning: + +```{r last_cleaning} + +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + +Remove data from before 2019-01-01 as not as reliable. + +```{r remove_pre_2019} + +x <- x %>% + filter(date >= "2019-01-01") + +``` + + +## Outcomes + +Outcome is defined for alerts which have either been validated or invalidated. + +```{r outcomes} + +## get only known outcomes +outcomes <- x %>% + filter(status %in% c("validee", "invalidee")) %>% + droplevels() %>% + mutate(validee = 1 * (status == "validee")) + +``` + + +## Check admin areas are correct + +```{r load_area_names_data} + +## load the data +current_spatial +area_names <- rio::import(current_spatial) %>% + clean_data(guess_dates = FALSE) + +as_names <- unique(area_names$as) +zs_names <- unique(area_names$zs) + +``` + +### Identify unknown areas + +```{r identify_area_names_data} + +## load the data +n_rows <- nrow(x) + +unknown_as <- x %>% + filter(!aire_de_sante %in% as_names) + +most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) + +start_date <- database_date - 20 +unknown_as_3weeks <- x %>% + filter(!aire_de_sante %in% as_names, + date >= start_date) + +``` + + +There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. + +```{r show_unknown_area_names} + +table_unknown_as <- unknown_as %>% + count(zone_de_sante, aire_de_sante) + +table_unknown_as %>% + show_table() + +``` + + +## Filter to known aire de santes + +```{r known_aire_de_sante} + +x <- x %>% + filter(aire_de_sante %in% as_names) + +``` + + + +## Add population data + +Load the population data + +```{r load_pop_data} + +## load the data +current_pop +pop_data <- rio::import(current_pop) +glimpse(pop_data) + +pop_data <- pop_data %>% + clean_data(guess_dates = FALSE) %>% + as_tibble() +``` + +### Check for area not in alerts + +```{r filter_by_alerts} + +## load the data +pop_data <- pop_data %>% + filter(aire_de_sante %in% x$aire_de_sante, + zone_de_sante %in% x$zone_de_sante) + +pop_data + +## aire de sante population +aire_de_sante_pop <- pop_data %>% + mutate(as_population = population) %>% + select(zone_de_sante, aire_de_sante, as_population) + +## zone de sante population +zone_de_sante_pop <- pop_data %>% + group_by(zone_de_sante) %>% + summarise(zs_population = sum(population, na.rm = TRUE)) + +total_population <- sum(unique(pop_data$population), na.rm = TRUE) + +## adding populations to main data +x <- x %>% + left_join(aire_de_sante_pop, + by = c("aire_de_sante", "zone_de_sante")) %>% + left_join(zone_de_sante_pop, + by = c("zone_de_sante")) + +if(!allNA(x$as_population)){ + x <- x %>% + mutate(total_population = total_population) +} + + +``` + + + +## Check population linkage (review regularly) + +```{r check_population_linkage} + +# Calculating number of individuals successfully linked to population data +number_individuals <- nrow(x) +number_individuals_aire_linked <- sum(!is.na(x$as_population)) +prop_linked <- sum(number_individuals_aire_linked / number_individuals) + +# Compare these two quantities - ensure there aren't any mispellings we're missing +table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching + +# Identifying Aire de Santes with missing population data +mambasa_ha_pop <- pop_data$population +index <- which(is.na(mambasa_ha_pop)) +non_pop_data <- pop_data$aire_de_sante[index] + +# Identifying number of alerts linked to Aire de Sante with +# missing population data +alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) + +``` + +There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. + + +## Expected decisions + +Alerts are supposed to be validated using the following key, depending on +whether the person has: + +1. **known contact** with a case and at least **one symptom** +2. if not 1, **unexplained bleeding** +3. if not 2, **fever and 3 other symptoms** + +Note that 3 de facto excludes bleeding as a candidate symptom. We create a new +variable which asserts these conditions: + +```{r expected_result} + +## elements of diagnostic +## criteria 1 +has_contact <- outcomes$contact_connu %in% c("confirme", "oui") +has_one_symptom <- outcomes %>% + select(fievre, + nausee_vom, + diarrhee, + fatigue, + anorexie, + dlr_abdo, + dlr_thorax, + dlr_muscu, + dlr_artic, + cephalee, + toux, + dyspnee, + dysphagie, + odynophagie, + hoquet, + bleed) %>% + apply(1, function(e) any(e == "oui", na.rm = TRUE)) +fits_1 <- has_contact & has_one_symptom + +## criteria 2 +has_bleeding <- outcomes$bleed == "oui" +fits_2 <- !fits_1 & has_bleeding + +## criteria 3 +has_fever <- outcomes$fievre == "oui" +has_3_symptoms <- outcomes %>% + select(nausee_vom, + diarrhee, + fatigue, + anorexie, + dlr_abdo, + dlr_thorax, + dlr_muscu, + dlr_artic, + cephalee, + toux, + dyspnee, + dysphagie, + odynophagie, + hoquet, + bleed) %>% + apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 +fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms + + +## make sure criteria are exclusive - no number in the table below should exceed +## 1. i.e this table should only zero or one as categories. +table(fits_1 + fits_2 + fits_3) + +## expected decisions +outcomes <- outcomes %>% + mutate(admit_contact = fits_1, + admit_bleeding = fits_2, + admit_fever = fits_3) %>% + mutate(expected_decision = ifelse( + fits_1 | fits_2 | fits_3, + "validee", + "invalidee"), + decision_comparison = case_when( + status == "validee" & expected_decision == "validee" ~ + "true_positive", + status == "invalidee" & expected_decision == "invalidee" ~ + "true_negative", + status == "validee" & expected_decision == "invalidee" ~ + "false_positive", + status == "invalidee" & expected_decision == "validee" ~ + "false_negative", + TRUE ~ NA_character_ + ), + decision_comparison = + factor(decision_comparison, + levels = c("true_positive", + "true_negative", + "false_positive", + "false_negative"))) + +## remove alerts with missing comparisons, from 2019 +outcomes <- outcomes %>% + filter(!is.na(decision_comparison), + date >= as.Date("2019-01-01")) + + +## check that classification is well-made +outcomes %>% + group_by(status, expected_decision, decision_comparison) %>% + count() + + +``` + + + +## Last 21 days + +We duplicate the previous datasets, retaining the 21 days leading up to the +current database date. + +```{r subset_21_days} + +start_date <- database_date - 21 +x_recent <- filter(x, date > start_date) +outcomes_recent <- filter(outcomes, date > start_date) + +``` + + +## Alerts counts split + +Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. + +```{r alert_count_split} + +greater_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 100) %>% + pull(aire_de_sante) + +less_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 100) %>% + pull(aire_de_sante) + +less_100[less_100 %in% greater_100] <- NA +less_100 <- less_100[complete.cases(less_100)] + +greater_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 20) %>% + pull(aire_de_sante) +less_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 20) %>% + pull(aire_de_sante) + +less_20_recent[less_20_recent %in% greater_20_recent] <- NA +less_20_recent <- less_20_recent[complete.cases(less_20_recent)] + +``` + + + +## Custom color scales + +We define custom colors for some of the variables used in the plots. + +```{r scales_fill} + +scale_origins <- scale_fill_manual( + "Origine", + values = c(communautaire = "#ffcc00", + recherche_active = "#c3c388", + surveillance_passive = "#ff6699", + point_entree = "#40bf80", + autre = "#668cff", + inconnu = "grey", + check_cleaning_rules = "grey")) + +scale_decisions <- scale_fill_manual( + "Décisions", + values = c(true_positive = "#94b8b8", + true_negative = "#8c8cd9", + false_positive = "#ff8080", + false_negative = "#b3003b"), + labels = c(true_positive = "Validation correcte", + true_negative = "Invalidation correcte", + false_positive = "Fausse alerte", + false_negative = "Alerte manquée")) + +scale_validations <- scale_fill_manual( + "Outcome", + values = c(statut_inconnu = "#BCB4A4", + validee = "#D56F3E", + invalidee = "#F2C69B"), + labels = c(validee = "Validée", + invalidee = "Invalidée", + statut_inconnu = "Statut Inconnu")) + + +scale_final_outcome <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(confirme = "#ff9999", + suspect = "#264d73", + non_cas = "#8cb3d9"), + labels = c(confirme = "Cas confirme", + suspect= "Cas suspect", + non_cas = "Non-cas" + + )) + +scale_final_outcome_perc <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(perc_confirme = "#ff9999", + perc_suspect = "#264d73", + perc_non_cas = "#8cb3d9"), + labels = c(perc_confirme = "Cas confirme", + perc_suspect= "Cas suspect", + perc_non_cas = "Non-cas" + + )) + + +``` + + + + + + +# Validation status {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + +## Overall + +### Weekly, since database start + +```{r sous_coord_time} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation - Mambasa")) + + large_txt + + rotate_x_text(45) + + scale_months + + theme(legend.position = "bottom") + +``` + + +### Table - weekly since database start + +```{r sous_coord_time_table, fig.keep = "all"} + +table_validation_overall_time <- x %>% + count(epiweek_report, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_time %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r sous_coord_recent} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + scale_weeks + + theme(legend.position = "bottom") + +``` + +### Table - daily past 3 weeks + +```{r sous_coord_recent_table, fig.keep = "all"} + +table_validation_overall_past_3_weeks <- x_recent %>% + count(date, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_past_3_weeks %>% + show_table() + +``` + + +## Overall proportion validated + +```{r sous_co_proportion_validated} + +x_prop <- x %>% + count(epiweek_report_label, status) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + p = prop_to_perc(validee / total), + lci = prop_ci(validee, total, "lower", TRUE), + uci = prop_ci(validee, total, "upper", TRUE)) %>% + select(epiweek_report_label, p, lci, uci) + +ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + + geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + + geom_point(aes(y = p), size = 2) + + geom_line(aes(y = p), size = 1) + + scale_color_discrete(guide = FALSE) + + ylim(c(0, 100)) + + labs(x = "", + y = "Pourcentage d'alertes validées \npar semaine", + title = paste0("Proportion d'alertes validées - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total} + +ggplot(x, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et \nzone de santé - Mambasa") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_validation, fig.keep = "all"} + +table_hz_total_validation <- x %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation ", + " et zone de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_validation, fig.keep = "all"} + +table_hz_over_time_validation <- x %>% + count(epiweek_report, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_validation %>% + show_table() + +``` + + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent} + +ggplot(x_recent, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation", + "et \nzone de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_validation, fig.keep = "all"} + +table_hz_total_recent_validation <- x_recent %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_validation %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation ", + "et zone de sante - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_validation <- x_recent %>% + count(date, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_validation %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table + +```{r health_area_total_greater100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Mambasa", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_less100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Mambasa", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total since database start + +```{r table_ha_total_validation, fig.keep = "all"} + +table_ha_total_validation <- x %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = epiweek_report_label, fill = status)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de sante - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_validation, fig.keep = "all"} + +table_ha_over_time_validation <- x %>% + count(epiweek_report, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_validation %>% + show_table() + +``` + + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table + +```{r health_area_total_recent_greater20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + " (avec plus de 20 alertes) - Mambasa"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_recent_less20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + " (avec moins de 20 alertes) - Mambasa"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total past 3 weeks + +```{r table_ha_total_recent_validation, fig.keep = "all"} + +table_ha_total_recent_validation <- x %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_validation %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent, fig.width = 12, fig.height = 8} + +x_validations <- x_recent %>% + filter(top_aires != "other") %>% + count(date, status, top_aires) + +ggplot(x_validations, aes(x = date, y = n, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de sante - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_validation <- x_recent %>% + count(date, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_validation %>% + show_table() + +``` + + + + + +# Origins {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their origin + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + + +## Overall + +### Weekly, since database start + +```{r sous_coord_origins} + +ggplot(x, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = "Nombre d'alertes par origine - Mambasa") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +```{r sous_coord_time_table_origins, fig.keep = "all"} + +table_origins <- x %>% + count(epiweek_report, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r origins_time_recent} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + rotate_x_text(45) + + large_txt + + theme(legend.position = "bottom", + panel.spacing.y = unit(1, "lines")) + + labs(title = "Nombre d'alertes par origine - Mambasa", + subtitle = "Données des trois dernières semaines", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_origins_recent, fig.keep = "all"} + +table_origins_recent <- x_recent %>% + count(date, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins_recent %>% + show_table() + +``` + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_origin} + +ggplot(x, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et zone de santé - Mambasa") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_origins, fig.keep = "all"} + +table_hz_total_origins <- x %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_origin, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + "et zone de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_origins, fig.keep = "all"} + +table_hz_over_time_origins <- x %>% + count(epiweek_report, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_origins %>% + show_table() + +``` + + +## Health Zone - Past 3 Weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_origin_recent} + +ggplot(x_recent, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et", " \nzone de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_origins, fig.keep = "all"} + +table_hz_total_recent_origins <- x_recent %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_origins %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + "et zone de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_origins <- x_recent %>% + count(date, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_origins %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_origins_greter100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Mambasa", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_origins_less100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Mambasa", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_origins, fig.keep = "all"} + +table_ha_total_origins <- x %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_origin, fig.width = 12, fig.height = 8} + +x_origins <- x %>% + filter(top_aires != "other") + +ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + " et zone de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_origins, fig.keep = "all"} + +table_ha_over_time_origins <- x %>% + count(epiweek_report, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_origins %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +``` {r health_area_total_recent_origin_greater20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "(avec plus de 20 alertes) - Mambasa"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +``` {r health_area_total_recent_origin_less20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "(avec moins de 20 alertes) - Mambasa"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - Total past 3 weeks + +```{r table_ha_total_recent_origins, fig.keep = "all"} + +table_ha_total_recent_origins <- x_recent %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_origins %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} + +x_origins_recent <- x_recent %>% + filter(top_aires != "other") %>% + count(date, origin, top_aires) + +ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + " et aire de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_origins <- x_recent %>% + count(date, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_origins %>% + show_table() + +``` + + + + + +# Validation decisions {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section focuses on the decision of alerts, by health zones. + +When looking at decisions on the treatment of alerts, we identify the 4 +following situations: + +* **true positive**: alerts were rightfully + validated (patient tested) +* **true negative**: alerts were rightfully not + validated (patient not tested) +* **false positive**: alerts were wrongly + validated (patient tested, shoud not have been) +* **false negative**: alerts were wrongly not + validated (patient not tested, should have been) + +**False positive** create a waste of resources as well as un-necessary pressure +on the patient and community. **False negative** create a risk of missing cases. + +For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + +## Overall + +### Weekly, since database start + +```{r decisions} + +ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar(color = "white") + + scale_x_discrete(drop = FALSE) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation - Mambasa") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + scale_decisions + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +``` {r table_decisions} + +table_decisions <- outcomes %>% + count(epiweek_report, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(c("row", "col")) %>% + mutate(prop_false_positive_95ci = + prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), + prop_false_negative_95ci = + prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), + prop_false_positive = prop_to_perc(false_positive / Total), + lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), + upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), + prop_false_negative = prop_to_perc(false_negative / Total), + lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), + upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) + +table_decisions %>% + select(-prop_false_positive, + -lower_false_positive, + -upper_false_positive, + -prop_false_negative , + -lower_false_negative, + -upper_false_negative + ) %>% + show_table() + +``` + + +### Table - incorrectly validated/not validated + +* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. +* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. + +```{r table_sens_spec} + +table_sens_spec <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + mutate(total_tested = true_positive + false_positive, + total_not_tested = true_negative + false_negative) %>% + mutate(incorrectly_validated_95ci = + prop_to_display_ci(false_positive, total_tested, + dec = 2, perc = TRUE), + incorrectly_not_validated_95ci = + prop_to_display_ci(false_negative, total_not_tested, + dec = 2, perc = TRUE)) +table_sens_spec %>% + show_table() + +``` + +`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. + +**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. + + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r decisions_time} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + rotate_x_text(45) + + large_txt + + scale_decisions + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + labs(title = paste("Nombre d'alertes par décision de validation - Mambasa"), + subtitle = "Données des trois dernières semaines", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Over time table + +``` {r table_decisions_recent} + +table_decisions_recent <- incidence(outcomes_recent$date, "day", + groups = outcomes_recent$decision_comparison) %>% + as.data.frame() %>% + adorn_totals(where = c("row", "col")) + +table_decisions_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_decision} + +ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision et zone de santé - Mambasa") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + rotate_x_text(45) + +``` + +### Table - Total since database start + +```{r table_hz_total_decisions, fig.keep = "all"} + +table_hz_total_decisions <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_decision, fig.width = 12, fig.height = 8} + +ggplot(outcomes, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_decisions, fig.keep = "all"} + +table_hz_over_time_decisions <- outcomes %>% + count(epiweek_report, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_decisions %>% + show_table() + +``` + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_decision_recent} + +ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision et zone de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_decisions, fig.keep = "all"} + +table_hz_total_recent_decisions <- outcomes_recent %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_decisions %>% + show_table() + +``` + +### Daily past 3 weeks + +```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_decisions %>% + show_table() + +``` + + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_decisionsgreater100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Mambasa", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_decisionsless100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Mambasa", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` +### Table - total since database start + + +```{r table_ha_total_decision, fig.keep = "all"} + +table_ha_total_decisions <- outcomes %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_decision, fig.width = 12, fig.height = 8} + +x_decisions <- outcomes %>% + filter(top_aires != "other") + +ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de sante - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_decisions, fig.keep = "all"} + +table_ha_over_time_decisions <- outcomes %>% + count(epiweek_report, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_decisions %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +```{r health_area_total_recent_decision_greater20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé", + "\n(avec plus de 20 alertes) - Mambasa"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_recent_decision_less20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé", + "\n(avec moins de 20 alertes) - Mambasa"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_decisions, fig.keep = "all"} + +table_ha_total_recent_decisions <- outcomes_recent %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_decisions %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} + +x_decisions_recent <- outcomes_recent %>% + filter(top_aires != "other") + +ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_decisions} + +table_ha_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_decisions %>% + show_table() + +``` + + + + + + +# Alert Rates {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. + +**Note:** Rate are only calculated for areas with population data. There +are `r length(non_pop_data)` Aire de Santes without population data, which totals + **`r alerts_no_pop`** alerts. + +**Note: All known Aire de Santes are reported in tables** + + + +```{r reactivate_alert_rates, include = FALSE} + +if(allNA(x$as_population)){ + knitr::opts_chunk$set(eval = FALSE) + no_pop_data <- TRUE + +} else{ + cat("There is no population data therefore alert rates can not be calculated.") +} + +``` + + + +## Overall + +### Weekly, Since Database Start + +```{r alert_rates} + +x_alert_rate_sous_co <- x %>% + count(epiweek_report_label, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, + aes(x = epiweek_report_label, y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Mambasa")) + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + +### Table - Weekly Since Database Start + +```{r table_alert_rates_per_week, fig.keep = "all"} + +table_alert_rates_per_week <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = statut_inconnu + invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_per_week %>% + show_table() + +``` + + + +## Overall Past 3 Weeks + +### Daily, Past 3 Weeks + +``` {r alert_rates_recent} + +x_alert_rate_sous_co <- x_recent %>% + count(date, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Mambasa"), +subtitle = "Données des trois dernières semaines") + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - Weekly Since Database Start + +```{r table_alert_rates_recent, fig.keep = "all"} + +table_alert_rates_recent <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = statut_inconnu + invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total, Since Database Start + +``` {r alert_rate_zone_de_sante_total_validation_status} + +date_span <- database_date - min(x$date, na.rm = TRUE) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(status, zone_de_sante, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Mambasa")) + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(epiweek_report_label, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + + +## Health Zone Past 3 Weeks + +### Total, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + as_population > 0, + top_zones != "other") %>% + count(status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * 3)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + +``` + +### Daily, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + zs_population > 0, + top_zones != "other") %>% + count(date, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + + + +## Health Area + +### Total, Since Database Start + +``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} + +date_span <- database_date - min(x$date) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, top_zones, aire_de_sante, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut ", + "de validation et aire de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x %>% + filter(top_aires != "other", + as_population > 0) %>% + count(epiweek_report_label, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / as_population) + +ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " \nde validation et aire de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + +## Health Area - Past 3 Weeks + +``` {r alert_rate_as_3_weeks, fig.width = 14} + +x_alert_rate_aire_sante_recent <- x_recent %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, aire_de_sante, top_zones, as_population) %>% + mutate(alert_rate = (10000 * n) / (as_population * 3)) + +ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par 10000 \npersonnes et par semaine", + title = paste0("Nombre d'alertes par 10000 personnes - Mambasa"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + +``` + + +### Daily, Past 3 Weeks + +``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x_recent %>% + filter( + top_aires != "other", + as_population > 0) %>% + count(date, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population)) + +ggplot(data = x_alert_rate_aire_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " \nde validation et aire de santé - Mambasa")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + + + + + +# Final status of alerts {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section investigates information on the proportions of false positive and false negative that became real cases. + + +## Overall final status by status decision comparison + +###Absolute number of alerts by final status and status decision comparison + +```{r final_status} + +outcomes %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_final_outcome + + facet_grid(.~ top_zones , scales = "free_x", space = "free")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +###Percentage of final status by alert status decision + +```{r final_status_percentage} + +perc_final_outcome <- outcomes %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_non_cas = prop_to_perc(non_cas/total), + perc_suspect = prop_to_perc(suspect/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + +perc_final_outcome %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +##Final status for the past 21 days + +###Absolute number of alerts by final status and status decision comparison for the past 21 days + +```{r final_status_recent} + + +outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +###Percentage of final status by alert status decision for the past 21 days + + +Only non-cases for the past 21 days as of the database of October 14th 2019. + +```{r final_status_percentage_recent} +cas <- unique(outcomes_recent$final_outcome) + +if(length(which(cas%in%c("confirme","suspect")))==0){ + print("non-cases only") +}else{ +perc_final_outcome_recent <- outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_suspect = prop_to_perc(suspect/total), + perc_non_cas = prop_to_perc(non_cas/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + + +perc_final_outcome_recent %>% + ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) +} +``` + + +### Table - final status overall +Only non-cases for the past 21 days as of the database of October 14th 2019. + +```{r table_final_status, fig.keep = "all"} +if(length(which(cas%in%c("confirme","suspect")))==0){ + print("non-cases only") +}else{ +perc_final_outcome %>% + show_table() +} +``` + + + +### Table - final status for the past 21 days + +```{r table_final_status_recent, fig.keep = "all"} +if(length(which(cas%in%c("confirme","suspect")))==0){ + print("non-cases only") +}else{ +perc_final_outcome_recent %>% + show_table() +} +``` + + + + +# Export data and tables {.tabset .tabset-fade .tabset-pills} + + +```{r reactivate_recent, include = FALSE} +knitr::opts_chunk$set(eval = TRUE) +``` + + +## Outline + +We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current +working directory. + + + + +## Export clean data + +We export some of the clean database, placed in `produced_rds/` as well as in +`data/clean/`: + +```{r export_rds, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +## create the text for the file name with the database date +rds_file_name <- sprintf("%sclean_%s.rds", + undated_file_name(current_mambasa), + format(database_date, "%Y-%m-%d")) +rds_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_rds", rds_file_name)) + +``` + +We copy these files to the `data/clean` folder: + +```{r copy_rds, eval = TRUE} +# copy some files into `data/clean/` + +if (!dir.exists("data/clean")) { + dir.create("data/clean") +} + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + rds_file_name) +# Copy the rds data +file.copy(from = file.path("produced_rds", rds_file_name), + to = destination, + overwrite = TRUE) + +``` + + + + +## Excel files + +The following code exports all tables named in `to_report` to `xslx` files, +stored inside the folder `produced_xlsx`: + +### Cleaned alerts database + +```{r exports_tables, eval = TRUE} + +cleaned_alerts_database_mambasa <- x + +to_export <- c("cleaned_alerts_database_mambasa", + "table_validation_overall_time", + "table_validation_overall_past_3_weeks", + "table_hz_total_validation", + "table_hz_over_time_validation", + "table_hz_total_recent_validation", + "table_hz_over_time_recent_validation", + "table_ha_total_validation", + "table_ha_over_time_validation", + "table_ha_total_recent_validation", + "table_ha_over_time_recent_validation", + "table_origins", + "table_origins_recent", + "table_hz_total_origins", + "table_hz_over_time_origins", + "table_hz_total_recent_origins", + "table_hz_over_time_recent_origins", + "table_ha_total_origins", + "table_ha_over_time_origins", + "table_ha_total_recent_origins", + "table_ha_over_time_recent_origins", + "table_decisions", + "table_sens_spec", + "table_decisions_recent", + "table_hz_total_decisions", + "table_hz_over_time_decisions", + "table_hz_total_recent_decisions", + "table_hz_over_time_recent_decisions", + "table_ha_total_decisions", + "table_ha_over_time_decisions", + "table_ha_total_recent_decisions", + "table_ha_over_time_recent_decisions", + "table_unknown_as") + +if(length(which(cas%in%c("confirme")))==0){ + print("non-cases only") +}else{ + to_export <- c(to_export, + "perc_final_outcome", + "perc_final_outcome_recent") +} +``` + +```{r xlsx_exports, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_xlsx")) { + dir.create("produced_xlsx") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_xlsx", + paste0(e, ".xlsx"))) +} + +``` + + +We copy the main data file to the `data/clean` folder: + +```{r export_xlsx, eval = TRUE} + +## create the text for the file name with the database date +xlsx_file_name <- sprintf("%sclean_%s.xlsx", + undated_file_name(current_mambasa), + format(database_date, "%Y-%m-%d")) +xlsx_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_xlsx", xlsx_file_name)) + +``` + + +```{r copy_xlsx, eval = TRUE} +# copy some files into `data/clean/` + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + xlsx_file_name) +# Copy the rds data +file.copy(from = file.path("produced_xlsx", xlsx_file_name), + to = destination, + overwrite = TRUE) + +``` + + + +Click on the following links to open the files (only works if the files above +have been generated and are in the same folder as this document): + + +```{r xlsx_links, results = "asis", eval = TRUE} + +for (e in to_export) { + txt <- sprintf("- [%s.xlsx](%s.xlsx)", + e, + file.path("produced_xlsx", + e)) + cat(txt, sep = "\n") +} + +``` + + + +## R objects + +The following code exports all tables named in `to_report` to `rds` files, +stored inside the folder `produced_rds`: + +```{r rds_exports, eval = TRUE} + +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_rds", + paste0(e, ".rds"))) +} + +``` + + + + + +# System information {.tabset .tabset-fade .tabset-pills} + + +## Outline + +The following information documents the system on which the document was +compiled. + + + +## System + +This provides information on the operating system. + +```{r system_info} +Sys.info() +``` + + +## R environment + +This provides information on the version of R used: + +```{r R_session} +R.version +``` + + + +## R packages + +This provides information on the packages used: + +```{r R_pkg} +sessionInfo() +``` + + +## Compilation parameters + +This shows which parameters were passed through `params` at compilation time: + +```{r params} +params +``` diff --git a/alerts/report_sources/alerts_mangina_2019-12-10.Rmd b/alerts/report_sources/alerts_mangina_2019-12-31.Rmd similarity index 96% rename from alerts/report_sources/alerts_mangina_2019-12-10.Rmd rename to alerts/report_sources/alerts_mangina_2019-12-31.Rmd index 7c63266..eab9fe4 100644 --- a/alerts/report_sources/alerts_mangina_2019-12-10.Rmd +++ b/alerts/report_sources/alerts_mangina_2019-12-31.Rmd @@ -1,2850 +1,2864 @@ ---- -title: "Investigation of alerts data: Mangina" -author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating Amy Gimma and Aminata Ndiaye for the analytic cell OEC Goma" -date: "`r format(Sys.time(), '%A %d %B %Y')`" -output: - html_document: - code_folding: hide - highlight: zenburn - number_sections: yes - theme: spacelab - toc: yes - toc_collapse: no - toc_depth: 1 - toc_float: yes - css: !expr here::here('css', 'style.css') ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - collapse = TRUE, - fig.width = 8, - fig.height = 6, - dpi = 150, - warning = FALSE, - message = FALSE, - fig.path = "figures/") -``` - - -
- -**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) - -**Code contributors:** Chris Jarvis, Charlie Whittaker - -**Data contributors:** Surveillance team - -**Version:** 1.0.0 - -**Reviewed by:** Thibaut Jombart - -**Notice**: this is a **stable, routine report**. **Do not touch it unless it is -broken.** To make a contribution, carefully read the -[README](../../../../../README.html) file. - - - - - - - - -# Data preparation {.tabset .tabset-fade .tabset-pills} - - - -## Outline - -This report cleans and analyses the alert data of Mangina. Input comes from an -`xlsx` file containing alerts in a specific format. Because all -sub-coordinations have different standards, each sub-coordination needs a -separate report. - -The data preparation involves the following steps, detailed in the following -tabs: - -* **Load scripts**: loads libraries and useful scripts used in the analyses; all - `.R` files contained in `scripts` at the root of the factory are automatically - loaded - -* **Load data**: imports datasets, and may contain some *ad hoc* changes to the -data such as specific data cleaning (not used in other reports), new variables -used in the analyses, etc. - -* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is - not used in other reports (otherwise cleaning should be done in a dedicated - report); this section is also used to create new variables used in the - analyses - -## Load scripts - -These scripts will load: - -* all local scripts, stored as `.R` filesinside `/scripts/` -* all Overall scripts, i.e. stored outside the factory in `../scripts/` - -```{r read_scripts} - -## read scripts -path_to_scripts <- here::here("scripts") -scripts_files <- dir(path_to_scripts, pattern = ".R$", - full.names = TRUE) -for (file in scripts_files) source(file, local = TRUE) - -ggthemr("fresh") - -``` - - - -## Load alerts data - -We extract the completion date from the file name: - -```{r load_alerts_data} - -## load the data -current_mangina -x_raw <- custom_import(current_mangina) -glimpse(x_raw) - -## extract database date from the file name -file_name <- gsub("^[^.]+/", "", current_mangina) -database_date <- file_name %>% - guess_dates() -database_date - -``` - -The **completion date** of the database is **`r format(database_date, format = -"%A %d %b %Y")`**. - - -## Clean data - -We use *linelist*'s function `clean_data()` to: - -- remove all special characters from the data -- set all characters to lower case -- replace all accentuated and diacritic characters with their closest ascii - match in the latin alphabet -- replace all separators with a single `_` -- replace all mis-spelling using a Overall dictionary (see the file - `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R - session, after compiling the document -- (optionally) detect date formats and convert data to dates, including cases - where format varies within a column - - -This cleaning is achieved with: - -```{r data_cleaning} - -x <- x_raw %>% - clean_data(guess_dates = FALSE, - wordlists = cleaning_rules) %>% - as_tibble() - -``` - - -## Renaming variables and sanity checks - -The following variables will be used, and are therefore checked: - -- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` -- sanitize `genre` -- make `origin` as a sanitized version of `source_notif` - -```{r check_variables} - -## date of notification -x <- x %>% mutate(date = guess_dates(date)) -range(x$date, na.rm = TRUE) - -# REVIEW: Remove NA dates -date_na <- sum(is.na(x$date)) -x <- x %>% filter(!is.na(date)) -# Check that na dates are removed - -## gender -table(x$sexe, useNA = "ifany") - -## contact connu -x <- mutate(x, contact_connu = lien) -table(x$contact_connu, useNA = "ifany") - -## zone de sante -x <- mutate(x, zone_de_sante = as.character(zs)) -table(x$zone_de_sante, useNA = "ifany") - -## aire de sante -x <- mutate(x, aire_de_sante = as.character(as)) -table(x$aire_de_sante, useNA = "ifany") - -## origin / source_notif -x <- mutate(x, origin = source_notif) -table(x$origin, useNA = "ifany") - -# create variable for alert validation -x <- x %>% mutate( - status = ifelse(conc_final %in% c("inconnu", "en_cours"), - "statut_inconnu", conc_final)) %>% - mutate(status = factor(status, levels = c("statut_inconnu", - "invalidee", - "validee"))) - -x$status[is.na(x$status)] <- "statut_inconnu" -table(x$status, useNA = "ifany") - -# another round of cleaning -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - - -There are `r length(date_na)` missing dates in the data. - - - -## Variable creation - -The following variables are created: - -- `bleed` - which describes whether the alert displayed any of the bleeding related -symptoms. -- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert -was reported in. -- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de -Santes) are grouped into "other". -- `top_aires` - which describes the 14 most active Aire de Santes (out of all the -legitimate Aire de Santes belonging to Mangina present in the database). Note - All -other Aire de Santes are aggregated into the category "other". - -- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. - - -Note - All other Aire de Santes are aggregated into the category "other". - -``` {r variable_creation} - -## bleed -bleed <- x %>% - select(contains("saignement")) %>% - apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) -x <- mutate(x, bleed = bleed) -table(x$bleed, useNA = "ifany") - -## epiweek report -x <- x %>% - mutate(epiweek_report = - aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% - mutate(epiweek_report_label = - aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) - - - -x <- x %>% - mutate(top_zones = top_values(zone_de_sante, 5)) %>% - mutate(top_aires = top_values(aire_de_sante, 14)) - - -#create a variable for the final status of the alert -x <- x %>% - mutate(final_outcome = class_final) %>% - mutate(final_outcome = factor(final_outcome, levels = c("confirme", - "non_cas", - "suspect"))) -``` - -One last round of dictionary-based cleaning: - -```{r last_cleaning} - -x <- x %>% - clean_variable_spelling(wordlists = cleaning_rules) - -``` - -Remove data from before 2019-01-01 as not as reliable. - -```{r remove_pre_2019} - -x <- x %>% - filter(date >= "2019-01-01") - -``` - - -## Outcomes - -Outcome is defined for alerts which have either been validated or invalidated. - -```{r outcomes} - -## get only known outcomes -outcomes <- x %>% - filter(status %in% c("validee", "invalidee")) %>% - droplevels() %>% - mutate(validee = 1 * (status == "validee")) - -``` - - -## Check admin areas are correct - -```{r load_area_names_data} - -## load the data -current_spatial -area_names <- rio::import(current_spatial) %>% - clean_data(guess_dates = FALSE) - -as_names <- unique(area_names$as) -zs_names <- unique(area_names$zs) - -``` - -### Identify unknown areas - -```{r identify_area_names_data} - -## load the data -n_rows <- nrow(x) - -unknown_as <- x %>% - filter(!aire_de_sante %in% as_names) - -most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) - -start_date <- database_date - 20 -unknown_as_3weeks <- x %>% - filter(!aire_de_sante %in% as_names, - date >= start_date) - -``` - - -There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. - -```{r show_unknown_area_names} - -table_unknown_as <- unknown_as %>% - count(zone_de_sante, aire_de_sante) - -table_unknown_as %>% - show_table() - -``` - - -## Filter to known aire de santes - -```{r known_aire_de_sante} - -x <- x %>% - filter(aire_de_sante %in% as_names) - -``` - - - -## Add population data - -Load the population data - -```{r load_pop_data} - -## load the data -current_pop -pop_data <- rio::import(current_pop) -glimpse(pop_data) - -pop_data <- pop_data %>% - clean_data(guess_dates = FALSE) %>% - as_tibble() -``` - -### Check for area not in alerts - -```{r filter_by_alerts} - -## load the data -pop_data <- pop_data %>% - filter(aire_de_sante %in% x$aire_de_sante, - zone_de_sante %in% x$zone_de_sante) - -pop_data - -## aire de sante population -aire_de_sante_pop <- pop_data %>% - mutate(as_population = population) %>% - select(zone_de_sante, aire_de_sante, as_population) - -## zone de sante population -zone_de_sante_pop <- pop_data %>% - group_by(zone_de_sante) %>% - summarise(zs_population = sum(population, na.rm = TRUE)) - -total_population <- sum(unique(pop_data$population), na.rm = TRUE) - -## adding populations to main data -x <- x %>% - left_join(aire_de_sante_pop, - by = c("aire_de_sante", "zone_de_sante")) %>% - left_join(zone_de_sante_pop, - by = c("zone_de_sante")) - -if(!allNA(x$as_population)){ - x <- x %>% - mutate(total_population = total_population) -} - - -``` - - - -## Check population linkage (review regularly) - -```{r check_population_linkage} - -# Calculating number of individuals successfully linked to population data -number_individuals <- nrow(x) -number_individuals_aire_linked <- sum(!is.na(x$as_population)) -prop_linked <- sum(number_individuals_aire_linked / number_individuals) - -# Compare these two quantities - ensure there aren't any mispellings we're missing -table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching - -# Identifying Aire de Santes with missing population data -mangina_ha_pop <- pop_data$population -index <- which(is.na(mangina_ha_pop)) -non_pop_data <- pop_data$aire_de_sante[index] - -# Identifying number of alerts linked to Aire de Sante with -# missing population data -alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) - -``` - -There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. - - -## Expected decisions - -Alerts are supposed to be validated using the following key, depending on -whether the person has: - -1. **known contact** with a case and at least **one symptom** -2. if not 1, **unexplained bleeding** -3. if not 2, **fever and 3 other symptoms** - -Note that 3 de facto excludes bleeding as a candidate symptom. We create a new -variable which asserts these conditions: - -```{r expected_result} - -## elements of diagnostic -## criteria 1 -has_contact <- outcomes$contact_connu %in% c("confirme", "oui") -has_one_symptom <- outcomes %>% - select(fievre, - naus, - diarr, - asth, - anor, - abdo, - thor, - musc, - arti, - ceph, - toux, - resp, - aval, - gor, - hoq, - bleed) %>% - apply(1, function(e) any(e == "oui", na.rm = TRUE)) -fits_1 <- has_contact & has_one_symptom - -## criteria 2 -has_bleeding <- outcomes$bleed == "oui" -fits_2 <- !fits_1 & has_bleeding - -## criteria 3 -has_fever <- outcomes$fievre == "oui" -has_3_symptoms <- outcomes %>% - select(naus, - diarr, - asth, - anor, - abdo, - thor, - musc, - arti, - ceph, - toux, - resp, - aval, - gor, - hoq, - bleed) %>% - apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 -fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms - - -## make sure criteria are exclusive - no number in the table below should exceed -## 1. i.e this table should only zero or one as categories. -table(fits_1 + fits_2 + fits_3) - -## expected decisions -outcomes <- outcomes %>% - mutate(admit_contact = fits_1, - admit_bleeding = fits_2, - admit_fever = fits_3) %>% - mutate(expected_decision = ifelse( - fits_1 | fits_2 | fits_3, - "validee", - "invalidee"), - decision_comparison = case_when( - status == "validee" & expected_decision == "validee" ~ - "true_positive", - status == "invalidee" & expected_decision == "invalidee" ~ - "true_negative", - status == "validee" & expected_decision == "invalidee" ~ - "false_positive", - status == "invalidee" & expected_decision == "validee" ~ - "false_negative", - TRUE ~ NA_character_ - ), - decision_comparison = - factor(decision_comparison, - levels = c("true_positive", - "true_negative", - "false_positive", - "false_negative"))) - -## remove alerts with missing comparisons, from 2019 -outcomes <- outcomes %>% - filter(!is.na(decision_comparison), - date >= as.Date("2019-01-01")) - - -## check that classification is well-made -outcomes %>% - group_by(status, expected_decision, decision_comparison) %>% - count() - - -``` - - - -## Last 21 days - -We duplicate the previous datasets, retaining the 21 days leading up to the -current database date. - -```{r subset_21_days} - -start_date <- database_date - 21 -x_recent <- filter(x, date > start_date) -outcomes_recent <- filter(outcomes, date > start_date) - -``` - - -## Alerts counts split - -Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. - -```{r alert_count_split} - -greater_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 100) %>% - pull(aire_de_sante) - -less_100 <- x %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 100) %>% - pull(aire_de_sante) - -less_100[less_100 %in% greater_100] <- NA -less_100 <- less_100[complete.cases(less_100)] - -greater_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n >= 20) %>% - pull(aire_de_sante) -less_20_recent <- x_recent %>% count(zone_de_sante, - aire_de_sante) %>% - filter(n < 20) %>% - pull(aire_de_sante) - -less_20_recent[less_20_recent %in% greater_20_recent] <- NA -less_20_recent <- less_20_recent[complete.cases(less_20_recent)] - -``` - - - - -## Custom color scales - -We define custom colors for some of the variables used in the plots. - -```{r scales_fill} - -scale_origins <- scale_fill_manual( - "Origine", - values = c(communautaire = "#ffcc00", - recherche_active = "#c3c388", - surveillance_passive = "#ff6699", - point_entree = "#40bf80", - autre = "#668cff", - inconnu = "grey", - check_cleaning_rules = "grey")) - -scale_decisions <- scale_fill_manual( - "Décisions", - values = c(true_positive = "#94b8b8", - true_negative = "#8c8cd9", - false_positive = "#ff8080", - false_negative = "#b3003b"), - labels = c(true_positive = "Validation correcte", - true_negative = "Invalidation correcte", - false_positive = "Fausse alerte", - false_negative = "Alerte manquée")) - -scale_validations <- scale_fill_manual( - "Outcome", - values = c(statut_inconnu = "#BCB4A4", - validee = "#D56F3E", - invalidee = "#F2C69B"), - labels = c(validee = "Validée", - invalidee = "Invalidée", - statut_inconnu = "Statut Inconnu")) - - -scale_final_outcome <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(confirme = "#ff9999", - suspect = "#264d73", - non_cas = "#8cb3d9"), - labels = c(confirme = "Cas confirme", - suspect= "Cas suspect", - non_cas = "Non-cas" - - )) - -scale_final_outcome_perc <- scale_fill_manual( - name = "Statut final de l'alerte", - values = c(perc_confirme = "#ff9999", - perc_suspect = "#264d73", - perc_non_cas = "#8cb3d9"), - labels = c(perc_confirme = "Cas confirme", - perc_suspect= "Cas suspect", - perc_non_cas = "Non-cas" - - )) - - -``` - - - - - - -# Validation status {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - -## Overall - -### Weekly, since database start - -```{r sous_coord_time} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation - Mangina")) + - large_txt + - rotate_x_text(45) + - scale_months + - theme(legend.position = "bottom") - -``` - - -### Table - weekly since database start - -```{r sous_coord_time_table, fig.keep = "all"} - -table_validation_overall_time <- x %>% - count(epiweek_report, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_time %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r sous_coord_recent} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - scale_weeks + - theme(legend.position = "bottom") - -``` - -### Table - daily past 3 weeks - -```{r sous_coord_recent_table, fig.keep = "all"} - -table_validation_overall_past_3_weeks <- x_recent %>% - count(date, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_validation_overall_past_3_weeks %>% - show_table() - -``` - - -## Overall proportion validated - -```{r sous_co_proportion_validated} - -x_prop <- x %>% - count(epiweek_report_label, status) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - p = prop_to_perc(validee / total), - lci = prop_ci(validee, total, "lower", TRUE), - uci = prop_ci(validee, total, "upper", TRUE)) %>% - select(epiweek_report_label, p, lci, uci) - -ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + - geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + - geom_point(aes(y = p), size = 2) + - geom_line(aes(y = p), size = 1) + - scale_color_discrete(guide = FALSE) + - ylim(c(0, 100)) + - labs(x = "", - y = "Pourcentage d'alertes validées \npar semaine", - title = paste0("Proportion d'alertes validées - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total} - -ggplot(x, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et \nzone de santé - Mangina") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_validation, fig.keep = "all"} - -table_hz_total_validation <- x %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = status)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation ", - " et zone de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_validation, fig.keep = "all"} - -table_hz_over_time_validation <- x %>% - count(epiweek_report, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_validation %>% - show_table() - -``` - - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent} - -ggplot(x_recent, aes(x = top_zones, fill = status)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation", - "et \nzone de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_validation, fig.keep = "all"} - -table_hz_total_recent_validation <- x_recent %>% - count(top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_validation %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = status)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation ", - "et zone de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_validation <- x_recent %>% - count(date, top_zones, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_validation %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table - -```{r health_area_total_greater100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Mangina", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_less100, fig.width = 14} - -x_validations <- x %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par statut de validation et aire de santé - Mangina", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total since database start - -```{r table_ha_total_validation, fig.keep = "all"} - -table_ha_total_validation <- x %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_validation %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = epiweek_report_label, fill = status)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_validation, fig.keep = "all"} - -table_ha_over_time_validation <- x %>% - count(epiweek_report, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_validation %>% - show_table() - -``` - - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table - -```{r health_area_total_recent_greater20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé ", - "\n(avec plus de 20 alertes) - Mangina"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_recent_less20, fig.width = 14} - -x_validations <- x_recent %>% - count(status, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - - -ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_validations + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par statut de validation et aire de santé", - "\n(avec moins de 20 alertes) - Mangina"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - total past 3 weeks - -```{r table_ha_total_recent_validation, fig.keep = "all"} - -table_ha_total_recent_validation <- x_recent %>% - count(aire_de_sante, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_validation %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent, fig.width = 12, fig.height = 8} - -x_validations <- x_recent %>% - filter(top_aires != "other") %>% - count(date, status, top_aires) - -ggplot(x_validations, aes(x = date, y = n, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par statut de validation", - " et aire de sante - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_validation <- x_recent %>% - count(date, top_aires, status) %>% - spread(status, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_validation %>% - show_table() - -``` - - - - - -# Origins {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section summarises alerts over time according to their origin - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - - -## Overall - -### Weekly, since database start - -```{r sous_coord_origins} - -ggplot(x, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = "Nombre d'alertes par origine - Mangina") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -```{r sous_coord_time_table_origins, fig.keep = "all"} - -table_origins <- x %>% - count(epiweek_report, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins %>% - show_table() - -``` - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r origins_time_recent} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - rotate_x_text(45) + - large_txt + - theme(legend.position = "bottom", - panel.spacing.y = unit(1, "lines")) + - labs(title = "Nombre d'alertes par origine - Mangina", - subtitle= "Données des trois dernières semaines", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_origins_recent, fig.keep = "all"} - -table_origins_recent <- x_recent %>% - count(date, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_origins_recent %>% - show_table() - -``` - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_origin} - -ggplot(x, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et zone de santé - Mangina") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - -### Table - total since database start - -```{r table_hz_total_origins, fig.keep = "all"} - -table_hz_total_origins <- x %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_origin, fig.width = 12, fig.height = 8} - -ggplot(x, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - "et zone de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_origins, fig.keep = "all"} - -table_hz_over_time_origins <- x %>% - count(epiweek_report, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_origins %>% - show_table() - -``` - - -## Health Zone - Past 3 Weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_origin_recent} - -ggplot(x_recent, aes(x = top_zones, fill = origin)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_origins + - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et zone de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_origins, fig.keep = "all"} - -table_hz_total_recent_origins <- x_recent %>% - count(top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_origins %>% - show_table() - -``` - - -### Daily past 3 weeks - -```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} - -ggplot(x_recent, aes(x = date, fill = origin)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - "et zone de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_origins <- x_recent %>% - count(date, top_zones, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_origins %>% - show_table() - -``` - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_origins_greter100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé -Mangina", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -```{r health_area_total_origins_less100, fig.width = 14} - -x_origins <- x %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par origine et aire de santé - Mangina", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total since database start - - -```{r table_ha_total_origins, fig.keep = "all"} - -table_ha_total_origins <- x %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_origins %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_origin, fig.width = 12, fig.height = 8} - -x_origins <- x %>% - filter(top_aires != "other") - -ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par origine", - " et zone de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_origins, fig.keep = "all"} - -table_ha_over_time_origins <- x %>% - count(epiweek_report, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_origins %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -``` {r health_area_total_recent_origin_greater20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "\n(avec plus de 20 alertes) - Mangina"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -``` {r health_area_total_recent_origin_less20, fig.width = 14} - -x_origins <- x_recent %>% - count(origin, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_origins + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par origine et aire de santé", - "\n(avec moins de 20 alertes) - Mangina"), - subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -### Table - Total past 3 weeks - -```{r table_ha_total_recent_origins, fig.keep = "all"} - -table_ha_total_recent_origins <- x_recent %>% - count(aire_de_sante, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_origins %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} - -x_origins_recent <- x_recent %>% - filter(top_aires != "other") %>% - count(date, origin, top_aires) - -ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_origins + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par origine", - " et aire de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} - -table_ha_over_time_recent_origins <- x_recent %>% - count(date, top_aires, origin) %>% - spread(origin, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_origins %>% - show_table() - -``` - - - - - -# Validation decisions {.tabset .tabset-fade .tabset-pills} - - -## Outline - -This section focuses on the decision of alerts, by health zones. - -When looking at decisions on the treatment of alerts, we identify the 4 -following situations: - -* **true positive**: alerts were rightfully - validated (patient tested) -* **true negative**: alerts were rightfully not - validated (patient not tested) -* **false positive**: alerts were wrongly - validated (patient tested, shoud not have been) -* **false negative**: alerts were wrongly not - validated (patient not tested, should have been) - -**False positive** create a waste of resources as well as un-necessary pressure -on the patient and community. **False negative** create a risk of missing cases. - -For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". - - -**Note: All known Aire de Santes are reported in tables** -**the top 14 Aire de Santes are plotted when it comes to plotting over time** -**(due to space constraints).** - - - -## Overall - -### Weekly, since database start - -```{r decisions} - -ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar(color = "white") + - scale_x_discrete(drop = FALSE) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation - Mangina") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - scale_decisions + - rotate_x_text(45) + - scale_months - -``` - -### Table - weekly since database start - -``` {r table_decisions} - -table_decisions <- outcomes %>% - count(epiweek_report, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(c("row", "col")) %>% - mutate(prop_false_positive_95ci = - prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), - prop_false_negative_95ci = - prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), - prop_false_positive = prop_to_perc(false_positive / Total), - lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), - upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), - prop_false_negative = prop_to_perc(false_negative / Total), - lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), - upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) - -table_decisions %>% - select(-prop_false_positive, - -lower_false_positive, - -upper_false_positive, - -prop_false_negative , - -lower_false_negative, - -upper_false_negative - ) %>% - show_table() - -``` - - -### Table - incorrectly validated/not validated - -* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. -* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. - -```{r table_sens_spec} - -table_sens_spec <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - mutate(total_tested = true_positive + false_positive, - total_not_tested = true_negative + false_negative) %>% - mutate(incorrectly_validated_95ci = - prop_to_display_ci(false_positive, total_tested, - dec = 2, perc = TRUE), - incorrectly_not_validated_95ci = - prop_to_display_ci(false_negative, total_not_tested, - dec = 2, perc = TRUE)) -table_sens_spec %>% - show_table() - -``` - -`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. - -**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. - - - -## Overall past 3 weeks - -### Daily, past 3 weeks - -```{r decisions_time} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - theme(strip.text.y = element_text(size = 12, angle = 0)) + - scale_months + - rotate_x_text(45) + - large_txt + - scale_decisions + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - labs(title = paste("Nombre d'alertes par décision de validation - Mangina"), - subtitle = "Données des trois dernières semaines", - x = "", - y = "Nombre d'alertes par jour") + - scale_weeks - -``` - -### Over time table - -``` {r table_decisions_recent} - -table_decisions_recent <- incidence(outcomes_recent$date, "day", - groups = outcomes_recent$decision_comparison) %>% - as.data.frame() %>% - adorn_totals(where = c("row", "col")) - -table_decisions_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total since database start - -``` {r health_zone_total_decision} - -ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision et zone de santé - Mangina") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt + - rotate_x_text(45) - -``` - -### Table - Total since database start - -```{r table_hz_total_decisions, fig.keep = "all"} - -table_hz_total_decisions <- outcomes %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_zone_time_decision, fig.width = 12, fig.height = 8} - -ggplot(outcomes, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 7, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_months - -``` - -### Table - weekly since database start - -```{r table_hz_over_time_decisions, fig.keep = "all"} - -table_hz_over_time_decisions <- outcomes %>% - count(epiweek_report, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_decisions %>% - show_table() - -``` - - -## Health Zone - past 3 weeks - -### Total past 3 weeks - -``` {r health_zone_total_recent_decision_recent} - -ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + - geom_bar() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision et zone de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - large_txt - -``` - -### Table - total past 3 weeks - -```{r table_hz_total_recent_decisions, fig.keep = "all"} - -table_hz_total_recent_decisions <- outcomes_recent %>% - count(top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_total_recent_decisions %>% - show_table() - -``` - -### Daily past 3 weeks - -```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} - -ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + - geom_histogram(binwidth = 1, col = "white") + - facet_wrap( ~ top_zones, scale = "free_y") + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation ", - "et zone de sante - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - guides(fill=guide_legend(ncol=2)) + - scale_weeks - -``` - -### Table - daily past 3 weeks - -```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} - -table_hz_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_zones, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_hz_over_time_recent_decisions %>% - show_table() - -``` - - - -## Health Area - -### Total since database start - -Graphs are split by greater or less than 100 alerts removed all are kept in table. - -```{r health_area_total_decisionsgreater100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Mangina", - subtitle = "avec plus de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_decisionsless100, fig.width = 14} - -x_decisions <- outcomes %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_100) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = "Nombre d'alertes par décision de validation et aire de santé - Mangina", - subtitle = "avec moins de 100 alertes") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` -### Table - total since database start - - -```{r table_ha_total_decision, fig.keep = "all"} - -table_ha_total_decisions <- outcomes %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_decisions %>% - show_table() - -``` - -### Weekly since database start - -```{r health_area_time_decision, fig.width = 12, fig.height = 8} - -x_decisions <- outcomes %>% - filter(top_aires != "other") - -ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par semaine", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de sante - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - -### Table - weekly since database start - -```{r table_ha_over_time_decisions, fig.keep = "all"} - -table_ha_over_time_decisions <- outcomes %>% - count(epiweek_report, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_decisions %>% - show_table() - -``` - - - -## Health Area - past 3 weeks - -### Total past 3 weeks - -Graphs are split by greater or less than 20 alerts removed all are kept in table. - -```{r health_area_total_recent_decision_greater20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% greater_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\n(avec plus de 20 alertes) - Mangina"), - subtitle = "Données des trois dernières semaines ") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -```{r health_area_total_recent_decision_less20, fig.width = 14} - -x_decisions <- outcomes_recent %>% - count(decision_comparison, zone_de_sante, aire_de_sante) %>% - filter(aire_de_sante %in% less_20_recent) - -ggplot(x_decisions, - aes(x = aire_de_sante, y = n, fill = decision_comparison)) + - geom_col() + - scale_x_discrete(drop = FALSE) + - scale_decisions + - facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes", - title = paste("Nombre d'alertes par décision de validation et aire de santé", - "\n(avec moins de 20 alertes) - Mangina"), - subtitle = "Données des trois dernières semaines ") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - -### Table - total past 3 weeks - -```{r table_ha_total_recent_decisions, fig.keep = "all"} - -table_ha_total_recent_decisions <- outcomes_recent %>% - count(aire_de_sante, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_total_recent_decisions %>% - show_table() - -``` - -### Daily, past 3 weeks - -```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} - -x_decisions_recent <- outcomes_recent %>% - filter(top_aires != "other") - -ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + - geom_bar() + - facet_wrap( ~ top_aires) + - scale_decisions + - labs(x = "", - y = "Nombre d'alertes par jour", - title = paste0("Nombre d'alertes par décision de validation", - " et aire de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - -### Table - daily past 3 weeks - -```{r table_ha_over_time_recent_decisions} - -table_ha_over_time_recent_decisions <- outcomes_recent %>% - count(date, top_aires, decision_comparison) %>% - spread(decision_comparison, n, fill = 0) %>% - adorn_totals(where = c("row", "col")) - -table_ha_over_time_recent_decisions %>% - show_table() - -``` - - - - - - -# Alert Rates {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. - -**Note:** Rate are only calculated for areas with population data. There -are `r length(non_pop_data)` Aire de Santes without population data, which totals - **`r alerts_no_pop`** alerts. - -**Note: All known Aire de Santes are reported in tables** - - - -```{r reactivate_alert_rates, include = FALSE} - -if(allNA(x$as_population)){ - knitr::opts_chunk$set(eval = FALSE) - no_pop_data <- TRUE - -} else{ - cat("There is no population data therefore alert rates can not be calculated.") -} - -``` - - - -## Overall - -### Weekly, Since Database Start - -```{r alert_rates} - -x_alert_rate_sous_co <- x %>% - count(epiweek_report_label, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, - aes(x = epiweek_report_label, y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_months - -``` - - -### Table - Weekly Since Database Start - -```{r table_alert_rates_per_week, fig.keep = "all"} - -table_alert_rates_per_week <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = statut_inconnu + invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_per_week %>% - show_table() - -``` - - - -## Overall Past 3 Weeks - -### Daily, Past 3 Weeks - -``` {r alert_rates_recent} - -x_alert_rate_sous_co <- x_recent %>% - count(date, status, total_population) %>% - mutate(alert_rate = 10000 * n / total_population) - -ggplot(data = x_alert_rate_sous_co, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - -### Table - Weekly Since Database Start - -```{r table_alert_rates_recent, fig.keep = "all"} - -table_alert_rates_recent <- x_alert_rate_sous_co %>% - select(-alert_rate) %>% - spread(status, n, fill = 0) %>% - mutate(total = invalidee + validee, - alert_rate = 10000 * total / total_population, - alert_rate = round(alert_rate, 4)) - -table_alert_rates_recent %>% - show_table() - -``` - - - -## Health Zone - -### Total, Since Database Start - -``` {r alert_rate_zone_de_sante_total_validation_status} - -date_span <- database_date - min(x$date, na.rm = TRUE) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(status, zone_de_sante, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - zs_population > 0) %>% - count(epiweek_report_label, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation, et zone de santé - Mnagina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - - -## Health Zone Past 3 Weeks - -### Total, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - as_population > 0, - top_zones != "other") %>% - count(status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / (zs_population * 3)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - theme(legend.position = "bottom") - - -``` - -### Daily, Past 3 Weeks - -``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} - -x_alert_rate_zone_sante <- x_recent %>% - filter( - zs_population > 0, - top_zones != "other") %>% - count(date, status, top_zones, zs_population) %>% - mutate(alert_rate = 10000 * n / zs_population) - -ggplot(data = x_alert_rate_zone_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_zones) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut\n", - "de validation et zone de santé - Mangina"), -subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_weeks - -``` - - - -## Health Area - -### Total, Since Database Start - -``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} - -date_span <- database_date - min(x$date) -number_weeks <- as.numeric(date_span/7) - -x_alert_rate_zone_sante <- x %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, top_zones, aire_de_sante, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population * number_weeks)) - -ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par semaine \net par 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut ", - "de validation et aire de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - - -``` - -### Weekly, Since Database Start - -``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x %>% - filter(top_aires != "other", - as_population > 0) %>% - count(epiweek_report_label, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / as_population) - -ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par semaine et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé - Mangina")) + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "2 month", date_labels = "%b") - -``` - - -## Health Area - Past 3 Weeks - -``` {r alert_rate_as_3_weeks, fig.width = 14} - -x_alert_rate_aire_sante_recent <- x_recent %>% - filter(top_zones != "other", - as_population > 0) %>% - count(status, aire_de_sante, top_zones, as_population) %>% - mutate(alert_rate = (10000 * n) / (as_population * 3)) - -ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, - y = alert_rate, fill = status)) + - geom_col() + - scale_validations + - facet_grid(. ~ top_zones, scales = "free_x", space = "free") + - theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), - strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + - labs(x = "", - y = "Nombre d'alertes par 10000 \npersonnes et par semaine", - title = paste0("Nombre d'alertes par 10000 personnes - Mangina"), - subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") - -``` - - -### Daily, Past 3 Weeks - -``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} - -x_alert_rate_aire_sante <- x_recent %>% - filter( - top_aires != "other", - as_population > 0) %>% - count(date, status, top_aires, as_population) %>% - mutate(alert_rate = 10000 * n / (as_population)) - -ggplot(data = x_alert_rate_aire_sante, aes(x = date, - y = alert_rate, fill = status)) + - geom_col() + - facet_wrap( ~ top_aires) + - scale_validations + - labs(x = "", - y = "Nombre d'alertes par jour et \npar 10,000 personnes", - title = paste0("Nombre d'alertes par 10000 personnes par statut", - " de validation et aire de santé - Mangina"), - subtitle = "Données des trois dernières semaines") + - large_txt + - rotate_x_text(45) + - theme(legend.position = "bottom") + - scale_x_date(date_breaks = "1 week", date_labels = "%d %b") - -``` - - - - - -# Final status of alerts {.tabset .tabset-fade .tabset-pills} - -## Outline - -This section investigates information on the proportions of false positive and false negative that became real cases. - - -## Overall final status by status decision comparison - -###Absolute number of alerts by final status and status decision comparison - -```{r final_status} - -outcomes %>% - filter(!is.na(final_outcome)) %>% -ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_x", space = "free")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -###Percentage of final status by alert status decision - -```{r final_status_percentage} - -perc_final_outcome <- outcomes %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + non_cas + suspect, - perc_confirme = prop_to_perc(confirme/total), - perc_non_cas = prop_to_perc(non_cas/total), - perc_suspect = prop_to_perc(suspect/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - -perc_final_outcome %>% -ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -##Final status for the past 21 days - -###Absolute number of alerts by final status and status decision comparison for the past 21 days - -```{r final_status_recent} - - -outcomes_recent %>% - filter(!is.na(final_outcome)) %>% -ggplot(aes(x = decision_comparison, fill = final_outcome)) + - geom_bar() + - scale_final_outcome + - scale_x_discrete(drop = FALSE) + - facet_grid(.~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Nombre total d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des trois dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - - -``` - - -###Percentage of final status by alert status decision for the past 21 days - -```{r final_status_percentage_recent} - -perc_final_outcome_recent <- outcomes_recent %>% - filter(!is.na(final_outcome)) %>% - count(top_zones, decision_comparison, final_outcome) %>% - spread(final_outcome, n, fill = 0) %>% - mutate(total= confirme + suspect, - perc_confirme = prop_to_perc(confirme/total), - perc_suspect = prop_to_perc(suspect/total)) %>% - pivot_longer(cols = starts_with("perc"), - names_to = "Proportion") - - - -perc_final_outcome_recent %>% -ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + - geom_col(color = "white") + - scale_final_outcome_perc + - scale_x_discrete(drop = FALSE) + - facet_grid(. ~ top_zones , scales = "free_y")+ - guides(fill = guide_legend(ncol = 2)) + - labs(x = "", - y = "Pourcentage d'alertes\n", - title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", - subtitle= "Données des 3 dernières semaines") + - theme(legend.position = "bottom") + - large_txt + - rotate_x_text(45) - -``` - - -### Table - final status overall - -```{r table_final_status, fig.keep = "all"} - -perc_final_outcome %>% - show_table() - -``` - - - -### Table - final status for the past 21 days - -```{r table_final_status_recent, fig.keep = "all"} - -perc_final_outcome_recent %>% - show_table() - -``` - - - - - -# Export data and tables {.tabset .tabset-fade .tabset-pills} - - -```{r reactivate_recent, include = FALSE} -knitr::opts_chunk$set(eval = TRUE) -``` - - -## Outline - -We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current -working directory. - - - - -## Export clean data - -We export some of the clean database, placed in `produced_rds/` as well as in -`data/clean/`: - -```{r export_rds, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -## create the text for the file name with the database date -rds_file_name <- sprintf("%sclean_%s.rds", - undated_file_name(current_mangina), - format(database_date, "%Y-%m-%d")) -rds_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_rds", rds_file_name)) - -``` - -We copy these files to the `data/clean` folder: - -```{r copy_rds, eval = TRUE} -# copy some files into `data/clean/` - -if (!dir.exists("data/clean")) { - dir.create("data/clean") -} - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - rds_file_name) -# Copy the rds data -file.copy(from = file.path("produced_rds", rds_file_name), - to = destination, - overwrite = TRUE) - -``` - - - - -## Excel files - -The following code exports all tables named in `to_report` to `xslx` files, -stored inside the folder `produced_xlsx`: - -### Cleaned alerts database - -```{r exports_tables, eval = TRUE} - -cleaned_alerts_database_mangina <- x - -to_export <- c("cleaned_alerts_database_mangina", - "table_validation_overall_time", - "table_validation_overall_past_3_weeks", - "table_hz_total_validation", - "table_hz_over_time_validation", - "table_hz_total_recent_validation", - "table_hz_over_time_recent_validation", - "table_ha_total_validation", - "table_ha_over_time_validation", - "table_ha_total_recent_validation", - "table_ha_over_time_recent_validation", - "table_origins", - "table_origins_recent", - "table_hz_total_origins", - "table_hz_over_time_origins", - "table_hz_total_recent_origins", - "table_hz_over_time_recent_origins", - "table_ha_total_origins", - "table_ha_over_time_origins", - "table_ha_total_recent_origins", - "table_ha_over_time_recent_origins", - "table_decisions", - "table_sens_spec", - "table_decisions_recent", - "table_hz_total_decisions", - "table_hz_over_time_decisions", - "table_hz_total_recent_decisions", - "table_hz_over_time_recent_decisions", - "table_ha_total_decisions", - "table_ha_over_time_decisions", - "table_ha_total_recent_decisions", - "table_ha_over_time_recent_decisions", - "perc_final_outcome", - "perc_final_outcome_recent", - "table_unknown_as") - -``` - -```{r xlsx_exports, eval = TRUE} - -## check if a directory exists and if not then creates it -if (!dir.exists("produced_xlsx")) { - dir.create("produced_xlsx") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_xlsx", - paste0(e, ".xlsx"))) -} - -``` - - -We copy the main data file to the `data/clean` folder: - -```{r export_xlsx, eval = TRUE} - -## create the text for the file name with the database date -xlsx_file_name <- sprintf("%sclean_%s.xlsx", - undated_file_name(current_mangina), - format(database_date, "%Y-%m-%d")) -xlsx_file_name - -## save the rds file in the produced_rds folder -rio::export(x, - file.path("produced_xlsx", xlsx_file_name)) - -``` - - -```{r copy_xlsx, eval = TRUE} -# copy some files into `data/clean/` - -# Provide the destination of where to copy the data -destination <- here("data", - "clean", - xlsx_file_name) -# Copy the rds data -file.copy(from = file.path("produced_xlsx", xlsx_file_name), - to = destination, - overwrite = TRUE) - -``` - - - -Click on the following links to open the files (only works if the files above -have been generated and are in the same folder as this document): - - -```{r xlsx_links, results = "asis", eval = TRUE} - - -for (e in to_export) { - txt <- sprintf("- [%s.xlsx](%s.xlsx)", - e, - file.path("produced_xlsx", - e)) - cat(txt, sep = "\n") -} - -``` - - - -## R objects - -The following code exports all tables named in `to_report` to `rds` files, -stored inside the folder `produced_rds`: - -```{r rds_exports, eval = TRUE} - -if (!dir.exists("produced_rds")) { - dir.create("produced_rds") -} - -for (e in to_export) { - rio::export(get(e), - file.path("produced_rds", - paste0(e, ".rds"))) -} - -``` - - - - - -# System information {.tabset .tabset-fade .tabset-pills} - - -## Outline - -The following information documents the system on which the document was -compiled. - - - -## System - -This provides information on the operating system. - -```{r system_info} -Sys.info() -``` - - -## R environment - -This provides information on the version of R used: - -```{r R_session} -R.version -``` - - - -## R packages - -This provides information on the packages used: - -```{r R_pkg} -sessionInfo() -``` - - -## Compilation parameters - -This shows which parameters were passed through `params` at compilation time: - -```{r params} -params -``` +--- +title: "Investigation of alerts data: Mangina" +author: "Charlie Whittaker, Christopher Jarvis, Thibaut Jombart, Flavio Finger, Jonathan Polonsky, Patrick Keating, Amy Gimma, Aminata Ndiaye, and Emma Glennon for the analytic cell OEC Goma" +date: "`r format(Sys.time(), '%A %d %B %Y')`" +output: + html_document: + code_folding: hide + highlight: zenburn + number_sections: yes + theme: spacelab + toc: yes + toc_collapse: no + toc_depth: 1 + toc_float: yes + css: !expr here::here('css', 'style.css') +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set(echo = TRUE, + eval = TRUE, + collapse = TRUE, + fig.width = 8, + fig.height = 6, + dpi = 150, + warning = FALSE, + message = FALSE, + fig.path = "figures/") +``` + + +
+ +**Maintainer:** Christopher Jarvis (christopher.jarvis@lshtm.ac.uk) + +**Code contributors:** Chris Jarvis, Charlie Whittaker + +**Data contributors:** Surveillance team + +**Version:** 1.0.0 + +**Reviewed by:** Thibaut Jombart + +**Notice**: this is a **stable, routine report**. **Do not touch it unless it is +broken.** To make a contribution, carefully read the +[README](../../../../../README.html) file. + + + + + + + + +# Data preparation {.tabset .tabset-fade .tabset-pills} + + + +## Outline + +This report cleans and analyses the alert data of Mangina. Input comes from an +`xlsx` file containing alerts in a specific format. Because all +sub-coordinations have different standards, each sub-coordination needs a +separate report. + +The data preparation involves the following steps, detailed in the following +tabs: + +* **Load scripts**: loads libraries and useful scripts used in the analyses; all + `.R` files contained in `scripts` at the root of the factory are automatically + loaded + +* **Load data**: imports datasets, and may contain some *ad hoc* changes to the +data such as specific data cleaning (not used in other reports), new variables +used in the analyses, etc. + +* **Clean data**: this section contains *ad hoc* data cleaning, i.e. which is + not used in other reports (otherwise cleaning should be done in a dedicated + report); this section is also used to create new variables used in the + analyses + +## Load scripts + +These scripts will load: + +* all local scripts, stored as `.R` filesinside `/scripts/` +* all Overall scripts, i.e. stored outside the factory in `../scripts/` + +```{r read_scripts} + +## read scripts +path_to_scripts <- here::here("scripts") +scripts_files <- dir(path_to_scripts, pattern = ".R$", + full.names = TRUE) +for (file in scripts_files) source(file, local = TRUE) + +ggthemr("fresh") + +``` + + + +## Load alerts data + +We extract the completion date from the file name: + +```{r load_alerts_data} + +## load the data +current_mangina +x_raw <- custom_import(current_mangina) +glimpse(x_raw) + +## extract database date from the file name +file_name <- gsub("^[^.]+/", "", current_mangina) +database_date <- file_name %>% + guess_dates() +database_date + +``` + +The **completion date** of the database is **`r format(database_date, format = +"%A %d %b %Y")`**. + + +## Clean data + +We use *linelist*'s function `clean_data()` to: + +- remove all special characters from the data +- set all characters to lower case +- replace all accentuated and diacritic characters with their closest ascii + match in the latin alphabet +- replace all separators with a single `_` +- replace all mis-spelling using a Overall dictionary (see the file + `cleaning_rules.xlsx` in `/dictionary/`, or type `cleaning_rules` in this R + session, after compiling the document +- (optionally) detect date formats and convert data to dates, including cases + where format varies within a column + + +This cleaning is achieved with: + +```{r data_cleaning} + +x <- x_raw %>% + clean_data(guess_dates = FALSE, + wordlists = cleaning_rules) %>% + as_tibble() + +``` + + +## Renaming variables and sanity checks + +The following variables will be used, and are therefore checked: + +- `date`: check no date is after the database completion, filtered for NA values after running `guess_dates` +- sanitize `genre` +- make `origin` as a sanitized version of `source_notif` + +```{r check_variables} + +## date of notification +x <- x %>% mutate(date = guess_dates(date)) +range(x$date, na.rm = TRUE) + +# REVIEW: Remove NA dates +date_na <- sum(is.na(x$date)) +x <- x %>% filter(!is.na(date)) +# Check that na dates are removed + +## gender +table(x$sexe, useNA = "ifany") + +## contact connu +x <- mutate(x, contact_connu = lien) +table(x$contact_connu, useNA = "ifany") + +## zone de sante +x <- mutate(x, zone_de_sante = as.character(zs)) +table(x$zone_de_sante, useNA = "ifany") + +## aire de sante +x <- mutate(x, aire_de_sante = as.character(as)) +table(x$aire_de_sante, useNA = "ifany") + +## origin / source_notif +x <- mutate(x, origin = source_notif) +table(x$origin, useNA = "ifany") + +# create variable for alert validation +x <- x %>% mutate( + status = ifelse(conc_final %in% c("inconnu", "en_cours"), + "statut_inconnu", conc_final)) %>% + mutate(status = factor(status, levels = c("statut_inconnu", + "invalidee", + "validee"))) + +x$status[is.na(x$status)] <- "statut_inconnu" +table(x$status, useNA = "ifany") + +# another round of cleaning +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + + +There are `r length(date_na)` missing dates in the data. + + + +## Variable creation + +The following variables are created: + +- `bleed` - which describes whether the alert displayed any of the bleeding related +symptoms. +- `epiweek_report` & `epiweek_report_label` - which describe the epiweek the alert +was reported in. +- `top_zones` - which describes the 3 most active Zone de Santes (other Zone de +Santes) are grouped into "other". +- `top_aires` - which describes the 14 most active Aire de Santes (out of all the +legitimate Aire de Santes belonging to Mangina present in the database). Note - All +other Aire de Santes are aggregated into the category "other". + +- `final_outcome` - which describes the final status of the alert after a test has (most likely) been done. + + +Note - All other Aire de Santes are aggregated into the category "other". + +``` {r variable_creation} + +## bleed +bleed <- x %>% + select(contains("saignement")) %>% + apply(1, function(e) ifelse(any(e == "oui"), "oui", "non")) +x <- mutate(x, bleed = bleed) +table(x$bleed, useNA = "ifany") + +## epiweek report +x <- x %>% + mutate(epiweek_report = + aweek::date2week(date, week_start = "Monday", floor_day = TRUE)) %>% + mutate(epiweek_report_label = + aweek::week2date(date, week_start = "Monday", floor_day = TRUE)) + + + +x <- x %>% + mutate(top_zones = top_values(zone_de_sante, 5)) %>% + mutate(top_aires = top_values(aire_de_sante, 14)) + + +#create a variable for the final status of the alert +x <- x %>% + mutate(final_outcome = class_final) %>% + mutate(final_outcome = factor(final_outcome, levels = c("confirme", + "non_cas", + "suspect"))) +``` + +One last round of dictionary-based cleaning: + +```{r last_cleaning} + +x <- x %>% + clean_variable_spelling(wordlists = cleaning_rules) + +``` + +Remove data from before 2019-01-01 as not as reliable. + +```{r remove_pre_2019} + +x <- x %>% + filter(date >= "2019-01-01") + +``` + + +## Outcomes + +Outcome is defined for alerts which have either been validated or invalidated. + +```{r outcomes} + +## get only known outcomes +outcomes <- x %>% + filter(status %in% c("validee", "invalidee")) %>% + droplevels() %>% + mutate(validee = 1 * (status == "validee")) + +``` + + +## Check admin areas are correct + +```{r load_area_names_data} + +## load the data +current_spatial +area_names <- rio::import(current_spatial) %>% + clean_data(guess_dates = FALSE) + +as_names <- unique(area_names$as) +zs_names <- unique(area_names$zs) + +``` + +### Identify unknown areas + +```{r identify_area_names_data} + +## load the data +n_rows <- nrow(x) + +unknown_as <- x %>% + filter(!aire_de_sante %in% as_names) + +most_recent_unknown_as <- max(unknown_as$date, na.rm = TRUE) + +start_date <- database_date - 20 +unknown_as_3weeks <- x %>% + filter(!aire_de_sante %in% as_names, + date >= start_date) + +``` + + +There are `r nrow(unknown_as)` alerts with unknown aire de santes with the latest being `r most_recent_unknown_as`. There are `r nrow(unknown_as_3weeks)` alerts in the last 3 weeks with an unknown aire de santes in the database. + +```{r show_unknown_area_names} + +table_unknown_as <- unknown_as %>% + count(zone_de_sante, aire_de_sante) + +table_unknown_as %>% + show_table() + +``` + + +## Filter to known aire de santes + +```{r known_aire_de_sante} + +x <- x %>% + filter(aire_de_sante %in% as_names) + +``` + + + +## Add population data + +Load the population data + +```{r load_pop_data} + +## load the data +current_pop +pop_data <- rio::import(current_pop) +glimpse(pop_data) + +pop_data <- pop_data %>% + clean_data(guess_dates = FALSE) %>% + as_tibble() +``` + +### Check for area not in alerts + +```{r filter_by_alerts} + +## load the data +pop_data <- pop_data %>% + filter(aire_de_sante %in% x$aire_de_sante, + zone_de_sante %in% x$zone_de_sante) + +pop_data + +## aire de sante population +aire_de_sante_pop <- pop_data %>% + mutate(as_population = population) %>% + select(zone_de_sante, aire_de_sante, as_population) + +## zone de sante population +zone_de_sante_pop <- pop_data %>% + group_by(zone_de_sante) %>% + summarise(zs_population = sum(population, na.rm = TRUE)) + +total_population <- sum(unique(pop_data$population), na.rm = TRUE) + +## adding populations to main data +x <- x %>% + left_join(aire_de_sante_pop, + by = c("aire_de_sante", "zone_de_sante")) %>% + left_join(zone_de_sante_pop, + by = c("zone_de_sante")) + +if(!allNA(x$as_population)){ + x <- x %>% + mutate(total_population = total_population) +} + + +``` + + + +## Check population linkage (review regularly) + +```{r check_population_linkage} + +# Calculating number of individuals successfully linked to population data +number_individuals <- nrow(x) +number_individuals_aire_linked <- sum(!is.na(x$as_population)) +prop_linked <- sum(number_individuals_aire_linked / number_individuals) + +# Compare these two quantities - ensure there aren't any mispellings we're missing +table(x$aire_de_sante[is.na(x$as_population)]) # aire de santes in database we're not matching + +# Identifying Aire de Santes with missing population data +mangina_ha_pop <- pop_data$population +index <- which(is.na(mangina_ha_pop)) +non_pop_data <- pop_data$aire_de_sante[index] + +# Identifying number of alerts linked to Aire de Sante with +# missing population data +alerts_no_pop <- sum(x$aire_de_sante %in% non_pop_data) + +``` + +There are `r length(non_pop_data)` aire_de_santes without population data. Rate will no be calculated for these areas. + + +## Expected decisions + +Alerts are supposed to be validated using the following key, depending on +whether the person has: + +1. **known contact** with a case and at least **one symptom** +2. if not 1, **unexplained bleeding** +3. if not 2, **fever and 3 other symptoms** + +Note that 3 de facto excludes bleeding as a candidate symptom. We create a new +variable which asserts these conditions: + +```{r expected_result} + +## elements of diagnostic +## criteria 1 +has_contact <- outcomes$contact_connu %in% c("confirme", "oui") +has_one_symptom <- outcomes %>% + select(fievre, + naus, + diarr, + asth, + anor, + abdo, + thor, + musc, + arti, + ceph, + toux, + resp, + aval, + gor, + hoq, + bleed) %>% + apply(1, function(e) any(e == "oui", na.rm = TRUE)) +fits_1 <- has_contact & has_one_symptom + +## criteria 2 +has_bleeding <- outcomes$bleed == "oui" +fits_2 <- !fits_1 & has_bleeding + +## criteria 3 +has_fever <- outcomes$fievre == "oui" +has_3_symptoms <- outcomes %>% + select(naus, + diarr, + asth, + anor, + abdo, + thor, + musc, + arti, + ceph, + toux, + resp, + aval, + gor, + hoq, + bleed) %>% + apply(1, function(e) sum(e == "oui", na.rm = TRUE)) >= 3 +fits_3 <- !fits_1 & !fits_2 & has_fever & has_3_symptoms + + +## make sure criteria are exclusive - no number in the table below should exceed +## 1. i.e this table should only zero or one as categories. +table(fits_1 + fits_2 + fits_3) + +## expected decisions +outcomes <- outcomes %>% + mutate(admit_contact = fits_1, + admit_bleeding = fits_2, + admit_fever = fits_3) %>% + mutate(expected_decision = ifelse( + fits_1 | fits_2 | fits_3, + "validee", + "invalidee"), + decision_comparison = case_when( + status == "validee" & expected_decision == "validee" ~ + "true_positive", + status == "invalidee" & expected_decision == "invalidee" ~ + "true_negative", + status == "validee" & expected_decision == "invalidee" ~ + "false_positive", + status == "invalidee" & expected_decision == "validee" ~ + "false_negative", + TRUE ~ NA_character_ + ), + decision_comparison = + factor(decision_comparison, + levels = c("true_positive", + "true_negative", + "false_positive", + "false_negative"))) + +## remove alerts with missing comparisons, from 2019 +outcomes <- outcomes %>% + filter(!is.na(decision_comparison), + date >= as.Date("2019-01-01")) + + +## check that classification is well-made +outcomes %>% + group_by(status, expected_decision, decision_comparison) %>% + count() + + +``` + + + +## Last 21 days + +We duplicate the previous datasets, retaining the 21 days leading up to the +current database date. + +```{r subset_21_days} + +start_date <- database_date - 21 +x_recent <- filter(x, date > start_date) +outcomes_recent <- filter(outcomes, date > start_date) + +``` + + +## Alerts counts split + +Calculate number of alerts greater than or equal to 100 or less than 100 over all time. Greater than or less than 20 for recent. There are many aire de santes and this helps to restrict what can be seen. + +```{r alert_count_split} + +greater_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 100) %>% + pull(aire_de_sante) + +less_100 <- x %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 100) %>% + pull(aire_de_sante) + +less_100[less_100 %in% greater_100] <- NA +less_100 <- less_100[complete.cases(less_100)] + +greater_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n >= 20) %>% + pull(aire_de_sante) +less_20_recent <- x_recent %>% count(zone_de_sante, + aire_de_sante) %>% + filter(n < 20) %>% + pull(aire_de_sante) + +less_20_recent[less_20_recent %in% greater_20_recent] <- NA +less_20_recent <- less_20_recent[complete.cases(less_20_recent)] + +``` + + + + +## Custom color scales + +We define custom colors for some of the variables used in the plots. + +```{r scales_fill} + +scale_origins <- scale_fill_manual( + "Origine", + values = c(communautaire = "#ffcc00", + recherche_active = "#c3c388", + surveillance_passive = "#ff6699", + point_entree = "#40bf80", + autre = "#668cff", + inconnu = "grey", + check_cleaning_rules = "grey")) + +scale_decisions <- scale_fill_manual( + "Décisions", + values = c(true_positive = "#94b8b8", + true_negative = "#8c8cd9", + false_positive = "#ff8080", + false_negative = "#b3003b"), + labels = c(true_positive = "Validation correcte", + true_negative = "Invalidation correcte", + false_positive = "Fausse alerte", + false_negative = "Alerte manquée")) + +scale_validations <- scale_fill_manual( + "Outcome", + values = c(statut_inconnu = "#BCB4A4", + validee = "#D56F3E", + invalidee = "#F2C69B"), + labels = c(validee = "Validée", + invalidee = "Invalidée", + statut_inconnu = "Statut Inconnu")) + + +scale_final_outcome <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(confirme = "#ff9999", + suspect = "#264d73", + non_cas = "#8cb3d9"), + labels = c(confirme = "Cas confirme", + suspect= "Cas suspect", + non_cas = "Non-cas" + + )) + +scale_final_outcome_perc <- scale_fill_manual( + name = "Statut final de l'alerte", + values = c(perc_confirme = "#ff9999", + perc_suspect = "#264d73", + perc_non_cas = "#8cb3d9"), + labels = c(perc_confirme = "Cas confirme", + perc_suspect= "Cas suspect", + perc_non_cas = "Non-cas" + + )) + + +``` + + + + + + +# Validation status {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their validation status i.e. whether they were validated or invalidated. + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + +## Overall + +### Weekly, since database start + +```{r sous_coord_time} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation - Mangina")) + + large_txt + + rotate_x_text(45) + + scale_months + + theme(legend.position = "bottom") + +``` + + +### Table - weekly since database start + +```{r sous_coord_time_table, fig.keep = "all"} + +table_validation_overall_time <- x %>% + count(epiweek_report, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_time %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r sous_coord_recent} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + scale_weeks + + theme(legend.position = "bottom") + +``` + +### Table - daily past 3 weeks + +```{r sous_coord_recent_table, fig.keep = "all"} + +table_validation_overall_past_3_weeks <- x_recent %>% + count(date, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_validation_overall_past_3_weeks %>% + show_table() + +``` + + +## Overall proportion validated + +```{r sous_co_proportion_validated} + +x_prop <- x %>% + count(epiweek_report_label, status) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + p = prop_to_perc(validee / total), + lci = prop_ci(validee, total, "lower", TRUE), + uci = prop_ci(validee, total, "upper", TRUE)) %>% + select(epiweek_report_label, p, lci, uci) + +ggplot(x_prop, aes(x = epiweek_report_label, col = "#D56F3E")) + + geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) + + geom_point(aes(y = p), size = 2) + + geom_line(aes(y = p), size = 1) + + scale_color_discrete(guide = FALSE) + + ylim(c(0, 100)) + + labs(x = "", + y = "Pourcentage d'alertes validées \npar semaine", + title = paste0("Proportion d'alertes validées - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total} + +ggplot(x, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et \nzone de santé - Mangina") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_validation, fig.keep = "all"} + +table_hz_total_validation <- x %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = status)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation ", + " et zone de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_validation, fig.keep = "all"} + +table_hz_over_time_validation <- x %>% + count(epiweek_report, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_validation %>% + show_table() + +``` + + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent} + +ggplot(x_recent, aes(x = top_zones, fill = status)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation", + "et \nzone de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_validation, fig.keep = "all"} + +table_hz_total_recent_validation <- x_recent %>% + count(top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_validation %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = status)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation ", + "et zone de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_validation, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_validation <- x_recent %>% + count(date, top_zones, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_validation %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table + +```{r health_area_total_greater100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Mangina", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_less100, fig.width = 14} + +x_validations <- x %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par statut de validation et aire de santé - Mangina", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total since database start + +```{r table_ha_total_validation, fig.keep = "all"} + +table_ha_total_validation <- x %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_validation %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = epiweek_report_label, fill = status)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_validation, fig.keep = "all"} + +table_ha_over_time_validation <- x %>% + count(epiweek_report, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_validation %>% + show_table() + +``` + + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table + +```{r health_area_total_recent_greater20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé ", + "\n(avec plus de 20 alertes) - Mangina"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_recent_less20, fig.width = 14} + +x_validations <- x_recent %>% + count(status, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + + +ggplot(x_validations, aes(x = aire_de_sante, y = n, fill = status)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_validations + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par statut de validation et aire de santé", + "\n(avec moins de 20 alertes) - Mangina"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - total past 3 weeks + +```{r table_ha_total_recent_validation, fig.keep = "all"} + +table_ha_total_recent_validation <- x_recent %>% + count(aire_de_sante, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_validation %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent, fig.width = 12, fig.height = 8} + +x_validations <- x_recent %>% + filter(top_aires != "other") %>% + count(date, status, top_aires) + +ggplot(x_validations, aes(x = date, y = n, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par statut de validation", + " et aire de sante - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_validation , fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_validation <- x_recent %>% + count(date, top_aires, status) %>% + spread(status, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_validation %>% + show_table() + +``` + + + + + +# Origins {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section summarises alerts over time according to their origin + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + + +## Overall + +### Weekly, since database start + +```{r sous_coord_origins} + +ggplot(x, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = "Nombre d'alertes par origine - Mangina") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +```{r sous_coord_time_table_origins, fig.keep = "all"} + +table_origins <- x %>% + count(epiweek_report, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins %>% + show_table() + +``` + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r origins_time_recent} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + rotate_x_text(45) + + large_txt + + theme(legend.position = "bottom", + panel.spacing.y = unit(1, "lines")) + + labs(title = "Nombre d'alertes par origine - Mangina", + subtitle= "Données des trois dernières semaines", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_origins_recent, fig.keep = "all"} + +table_origins_recent <- x_recent %>% + count(date, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_origins_recent %>% + show_table() + +``` + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_origin} + +ggplot(x, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et zone de santé - Mangina") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + +### Table - total since database start + +```{r table_hz_total_origins, fig.keep = "all"} + +table_hz_total_origins <- x %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_origin, fig.width = 12, fig.height = 8} + +ggplot(x, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + "et zone de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_origins, fig.keep = "all"} + +table_hz_over_time_origins <- x %>% + count(epiweek_report, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_origins %>% + show_table() + +``` + + +## Health Zone - Past 3 Weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_origin_recent} + +ggplot(x_recent, aes(x = top_zones, fill = origin)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_origins + + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et zone de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_origins, fig.keep = "all"} + +table_hz_total_recent_origins <- x_recent %>% + count(top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_origins %>% + show_table() + +``` + + +### Daily past 3 weeks + +```{r health_zone_time_recent_origins, fig.width = 12, fig.height = 8} + +ggplot(x_recent, aes(x = date, fill = origin)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + "et zone de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_origins <- x_recent %>% + count(date, top_zones, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_origins %>% + show_table() + +``` + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_origins_greter100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé -Mangina", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +```{r health_area_total_origins_less100, fig.width = 14} + +x_origins <- x %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par origine et aire de santé - Mangina", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total since database start + + +```{r table_ha_total_origins, fig.keep = "all"} + +table_ha_total_origins <- x %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_origins %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_origin, fig.width = 12, fig.height = 8} + +x_origins <- x %>% + filter(top_aires != "other") + +ggplot(x_origins, aes(x = epiweek_report_label, fill = origin)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par origine", + " et zone de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_origins, fig.keep = "all"} + +table_ha_over_time_origins <- x %>% + count(epiweek_report, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_origins %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +``` {r health_area_total_recent_origin_greater20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "\n(avec plus de 20 alertes) - Mangina"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +``` {r health_area_total_recent_origin_less20, fig.width = 14} + +x_origins <- x_recent %>% + count(origin, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_origins, aes(x = aire_de_sante, y = n, fill = origin)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_origins + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par origine et aire de santé", + "\n(avec moins de 20 alertes) - Mangina"), + subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + + +### Table - Total past 3 weeks + +```{r table_ha_total_recent_origins, fig.keep = "all"} + +table_ha_total_recent_origins <- x_recent %>% + count(aire_de_sante, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_origins %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_origin_recent, fig.width = 12, fig.height = 8} + +x_origins_recent <- x_recent %>% + filter(top_aires != "other") %>% + count(date, origin, top_aires) + +ggplot(x_origins_recent, aes(x = date, y = n, fill = origin)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_origins + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par origine", + " et aire de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_origins, fig.width = 12, fig.height = 5} + +table_ha_over_time_recent_origins <- x_recent %>% + count(date, top_aires, origin) %>% + spread(origin, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_origins %>% + show_table() + +``` + + + + + +# Validation decisions {.tabset .tabset-fade .tabset-pills} + + +## Outline + +This section focuses on the decision of alerts, by health zones. + +When looking at decisions on the treatment of alerts, we identify the 4 +following situations: + +* **true positive**: alerts were rightfully + validated (patient tested) +* **true negative**: alerts were rightfully not + validated (patient not tested) +* **false positive**: alerts were wrongly + validated (patient tested, shoud not have been) +* **false negative**: alerts were wrongly not + validated (patient not tested, should have been) + +**False positive** create a waste of resources as well as un-necessary pressure +on the patient and community. **False negative** create a risk of missing cases. + +For better communication purposes, false-positives will be called "Fausse alerte" and false-negatives will be called "Alerte manquée". + + +**Note: All known Aire de Santes are reported in tables** +**the top 14 Aire de Santes are plotted when it comes to plotting over time** +**(due to space constraints).** + + + +## Overall + +### Weekly, since database start + +```{r decisions} + +ggplot(outcomes, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar(color = "white") + + scale_x_discrete(drop = FALSE) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation - Mangina") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + scale_decisions + + rotate_x_text(45) + + scale_months + +``` + +### Table - weekly since database start + +``` {r table_decisions} + +table_decisions <- outcomes %>% + count(epiweek_report, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(c("row", "col")) %>% + mutate(prop_false_positive_95ci = + prop_to_display_ci(false_positive, Total, dec = 2, perc = TRUE), + prop_false_negative_95ci = + prop_to_display_ci(false_negative, Total, dec = 2, perc = TRUE), + prop_false_positive = prop_to_perc(false_positive / Total), + lower_false_positive = prop_ci(false_positive, Total, "lower", TRUE), + upper_false_positive = prop_ci(false_positive, Total, "upper", TRUE), + prop_false_negative = prop_to_perc(false_negative / Total), + lower_false_negative = prop_ci(false_negative, Total, "lower", TRUE), + upper_false_negative = prop_ci(false_negative, Total, "upper", TRUE)) + +table_decisions %>% + select(-prop_false_positive, + -lower_false_positive, + -upper_false_positive, + -prop_false_negative , + -lower_false_negative, + -upper_false_negative + ) %>% + show_table() + +``` + + +### Table - incorrectly validated/not validated + +* Individuals incorrectly validated. They were validated and tested but did not meet the case definition. +* Individuals incorrectly not validated. They were not validated and tested but did meet the case definition. + +```{r table_sens_spec} + +table_sens_spec <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + mutate(total_tested = true_positive + false_positive, + total_not_tested = true_negative + false_negative) %>% + mutate(incorrectly_validated_95ci = + prop_to_display_ci(false_positive, total_tested, + dec = 2, perc = TRUE), + incorrectly_not_validated_95ci = + prop_to_display_ci(false_negative, total_not_tested, + dec = 2, perc = TRUE)) +table_sens_spec %>% + show_table() + +``` + +`incorrectly validated` refers to the proportion of validated alerts which should not have been validated. `incorrectly invalidated` refers to the proportion of invalidated alerts that should have been validated. + +**Note:** The `incorrectly validated` variable will be over-estimated if alerts forms are poorly documented/incomplete - i.e. if patients had symptoms not reported in the form but that were used in the decision making process. + + + +## Overall past 3 weeks + +### Daily, past 3 weeks + +```{r decisions_time} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + theme(strip.text.y = element_text(size = 12, angle = 0)) + + scale_months + + rotate_x_text(45) + + large_txt + + scale_decisions + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + labs(title = paste("Nombre d'alertes par décision de validation - Mangina"), + subtitle = "Données des trois dernières semaines", + x = "", + y = "Nombre d'alertes par jour") + + scale_weeks + +``` + +### Over time table + +``` {r table_decisions_recent} + +table_decisions_recent <- incidence(outcomes_recent$date, "day", + groups = outcomes_recent$decision_comparison) %>% + as.data.frame() %>% + adorn_totals(where = c("row", "col")) + +table_decisions_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total since database start + +``` {r health_zone_total_decision} + +ggplot(outcomes, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision et zone de santé - Mangina") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + + rotate_x_text(45) + +``` + +### Table - Total since database start + +```{r table_hz_total_decisions, fig.keep = "all"} + +table_hz_total_decisions <- outcomes %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_zone_time_decision, fig.width = 12, fig.height = 8} + +ggplot(outcomes, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 7, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_months + +``` + +### Table - weekly since database start + +```{r table_hz_over_time_decisions, fig.keep = "all"} + +table_hz_over_time_decisions <- outcomes %>% + count(epiweek_report, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_decisions %>% + show_table() + +``` + + +## Health Zone - past 3 weeks + +### Total past 3 weeks + +``` {r health_zone_total_recent_decision_recent} + +ggplot(outcomes_recent, aes(x = top_zones, fill = decision_comparison)) + + geom_bar() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision et zone de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + large_txt + +``` + +### Table - total past 3 weeks + +```{r table_hz_total_recent_decisions, fig.keep = "all"} + +table_hz_total_recent_decisions <- outcomes_recent %>% + count(top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_total_recent_decisions %>% + show_table() + +``` + +### Daily past 3 weeks + +```{r health_zone_time_recent_decisions, fig.width = 12, fig.height = 8} + +ggplot(outcomes_recent, aes(x = date, fill = decision_comparison)) + + geom_histogram(binwidth = 1, col = "white") + + facet_wrap( ~ top_zones, scale = "free_y") + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation ", + "et zone de sante - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + guides(fill=guide_legend(ncol=2)) + + scale_weeks + +``` + +### Table - daily past 3 weeks + +```{r table_hz_over_time_recent_decisions, fig.width = 12, fig.height = 5} + +table_hz_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_zones, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_hz_over_time_recent_decisions %>% + show_table() + +``` + + + +## Health Area + +### Total since database start + +Graphs are split by greater or less than 100 alerts removed all are kept in table. + +```{r health_area_total_decisionsgreater100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Mangina", + subtitle = "avec plus de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_decisionsless100, fig.width = 14} + +x_decisions <- outcomes %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_100) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = "Nombre d'alertes par décision de validation et aire de santé - Mangina", + subtitle = "avec moins de 100 alertes") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` +### Table - total since database start + + +```{r table_ha_total_decision, fig.keep = "all"} + +table_ha_total_decisions <- outcomes %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_decisions %>% + show_table() + +``` + +### Weekly since database start + +```{r health_area_time_decision, fig.width = 12, fig.height = 8} + +x_decisions <- outcomes %>% + filter(top_aires != "other") + +ggplot(x_decisions, aes(x = epiweek_report_label, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par semaine", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de sante - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + +### Table - weekly since database start + +```{r table_ha_over_time_decisions, fig.keep = "all"} + +table_ha_over_time_decisions <- outcomes %>% + count(epiweek_report, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_decisions %>% + show_table() + +``` + + + +## Health Area - past 3 weeks + +### Total past 3 weeks + +Graphs are split by greater or less than 20 alerts removed all are kept in table. + +```{r health_area_total_recent_decision_greater20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% greater_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé", + "\n(avec plus de 20 alertes) - Mangina"), + subtitle = "Données des trois dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +```{r health_area_total_recent_decision_less20, fig.width = 14} + +x_decisions <- outcomes_recent %>% + count(decision_comparison, zone_de_sante, aire_de_sante) %>% + filter(aire_de_sante %in% less_20_recent) + +ggplot(x_decisions, + aes(x = aire_de_sante, y = n, fill = decision_comparison)) + + geom_col() + + scale_x_discrete(drop = FALSE) + + scale_decisions + + facet_grid(. ~ zone_de_sante, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes", + title = paste("Nombre d'alertes par décision de validation et aire de santé", + "\n(avec moins de 20 alertes) - Mangina"), + subtitle = "Données des trois dernières semaines ") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + + +``` + +### Table - total past 3 weeks + +```{r table_ha_total_recent_decisions, fig.keep = "all"} + +table_ha_total_recent_decisions <- outcomes_recent %>% + count(aire_de_sante, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_total_recent_decisions %>% + show_table() + +``` + +### Daily, past 3 weeks + +```{r health_area_time_recent_decision_recent, fig.width = 12, fig.height = 8} + +x_decisions_recent <- outcomes_recent %>% + filter(top_aires != "other") + +ggplot(x_decisions_recent, aes(x = date, fill = decision_comparison)) + + geom_bar() + + facet_wrap( ~ top_aires) + + scale_decisions + + labs(x = "", + y = "Nombre d'alertes par jour", + title = paste0("Nombre d'alertes par décision de validation", + " et aire de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + +### Table - daily past 3 weeks + +```{r table_ha_over_time_recent_decisions} + +table_ha_over_time_recent_decisions <- outcomes_recent %>% + count(date, top_aires, decision_comparison) %>% + spread(decision_comparison, n, fill = 0) %>% + adorn_totals(where = c("row", "col")) + +table_ha_over_time_recent_decisions %>% + show_table() + +``` + + + + + + +# Alert Rates {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section integrates information on the number of alerts with population data to estimate the number of validated/invalidated alerts per 10,000 people. + +**Note:** Rate are only calculated for areas with population data. There +are `r length(non_pop_data)` Aire de Santes without population data, which totals + **`r alerts_no_pop`** alerts. + +**Note: All known Aire de Santes are reported in tables** + + + +```{r reactivate_alert_rates, include = FALSE} + +if(allNA(x$as_population)){ + knitr::opts_chunk$set(eval = FALSE) + no_pop_data <- TRUE + +} else{ + cat("There is no population data therefore alert rates can not be calculated.") +} + +``` + + + +## Overall + +### Weekly, Since Database Start + +```{r alert_rates} + +x_alert_rate_sous_co <- x %>% + count(epiweek_report_label, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, + aes(x = epiweek_report_label, y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_months + +``` + + +### Table - Weekly Since Database Start + +```{r table_alert_rates_per_week, fig.keep = "all"} + +table_alert_rates_per_week <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = statut_inconnu + invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_per_week %>% + show_table() + +``` + + + +## Overall Past 3 Weeks + +### Daily, Past 3 Weeks + +``` {r alert_rates_recent} + +x_alert_rate_sous_co <- x_recent %>% + count(date, status, total_population) %>% + mutate(alert_rate = 10000 * n / total_population) + +ggplot(data = x_alert_rate_sous_co, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + +### Table - Weekly Since Database Start + +```{r table_alert_rates_recent, fig.keep = "all"} + +table_alert_rates_recent <- x_alert_rate_sous_co %>% + select(-alert_rate) %>% + spread(status, n, fill = 0) %>% + mutate(total = invalidee + validee, + alert_rate = 10000 * total / total_population, + alert_rate = round(alert_rate, 4)) + +table_alert_rates_recent %>% + show_table() + +``` + + + +## Health Zone + +### Total, Since Database Start + +``` {r alert_rate_zone_de_sante_total_validation_status} + +date_span <- database_date - min(x$date, na.rm = TRUE) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(status, zone_de_sante, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = zone_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_zone_de_sante_weekly_status, fig.height = 5, fig.width = 10} + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + zs_population > 0) %>% + count(epiweek_report_label, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation, et zone de santé - Mnagina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + + +## Health Zone Past 3 Weeks + +### Total, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_weekly_validation_status_recent} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + as_population > 0, + top_zones != "other") %>% + count(status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / (zs_population * 3)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = top_zones, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + theme(legend.position = "bottom") + + +``` + +### Daily, Past 3 Weeks + +``` {r alert_rate_zone_de_sante_daily_status, fig.height = 5, fig.width = 12} + +x_alert_rate_zone_sante <- x_recent %>% + filter( + zs_population > 0, + top_zones != "other") %>% + count(date, status, top_zones, zs_population) %>% + mutate(alert_rate = 10000 * n / zs_population) + +ggplot(data = x_alert_rate_zone_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_zones) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut\n", + "de validation et zone de santé - Mangina"), +subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_weeks + +``` + + + +## Health Area + +### Total, Since Database Start + +``` {r alert_rate_aire_de_sante_weekly_val_total, fig.width = 14} + +date_span <- database_date - min(x$date) +number_weeks <- as.numeric(date_span/7) + +x_alert_rate_zone_sante <- x %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, top_zones, aire_de_sante, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population * number_weeks)) + +ggplot(data = x_alert_rate_zone_sante, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par semaine \net par 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut ", + "de validation et aire de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + +``` + +### Weekly, Since Database Start + +``` {r alert_rate_aire_de_sante_over_time, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x %>% + filter(top_aires != "other", + as_population > 0) %>% + count(epiweek_report_label, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / as_population) + +ggplot(data = x_alert_rate_aire_sante, aes(x = epiweek_report_label, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par semaine et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " de validation et aire de santé - Mangina")) + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "2 month", date_labels = "%b") + +``` + + +## Health Area - Past 3 Weeks + +``` {r alert_rate_as_3_weeks, fig.width = 14} + +x_alert_rate_aire_sante_recent <- x_recent %>% + filter(top_zones != "other", + as_population > 0) %>% + count(status, aire_de_sante, top_zones, as_population) %>% + mutate(alert_rate = (10000 * n) / (as_population * 3)) + +ggplot(data = x_alert_rate_aire_sante_recent, aes(x = aire_de_sante, + y = alert_rate, fill = status)) + + geom_col() + + scale_validations + + facet_grid(. ~ top_zones, scales = "free_x", space = "free") + + theme(strip.text.x = element_text(size = 12, angle = 0, color = "#5c8a8a"), + strip.background = element_rect(fill = "#e0ebeb", color = "#5c8a8a")) + + labs(x = "", + y = "Nombre d'alertes par 10000 \npersonnes et par semaine", + title = paste0("Nombre d'alertes par 10000 personnes - Mangina"), + subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + +``` + + +### Daily, Past 3 Weeks + +``` {r alert_rate_aire_de_sante_recent, fig.width = 12, fig.height = 8} + +x_alert_rate_aire_sante <- x_recent %>% + filter( + top_aires != "other", + as_population > 0) %>% + count(date, status, top_aires, as_population) %>% + mutate(alert_rate = 10000 * n / (as_population)) + +ggplot(data = x_alert_rate_aire_sante, aes(x = date, + y = alert_rate, fill = status)) + + geom_col() + + facet_wrap( ~ top_aires) + + scale_validations + + labs(x = "", + y = "Nombre d'alertes par jour et \npar 10,000 personnes", + title = paste0("Nombre d'alertes par 10000 personnes par statut", + " de validation et aire de santé - Mangina"), + subtitle = "Données des trois dernières semaines") + + large_txt + + rotate_x_text(45) + + theme(legend.position = "bottom") + + scale_x_date(date_breaks = "1 week", date_labels = "%d %b") + +``` + + + + + +# Final status of alerts {.tabset .tabset-fade .tabset-pills} + +## Outline + +This section investigates information on the proportions of false positive and false negative that became real cases. + + +## Overall final status by status decision comparison + +###Absolute number of alerts by final status and status decision comparison + +```{r final_status} + +outcomes %>% + filter(!is.na(final_outcome)) %>% +ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_x", space = "free")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +###Percentage of final status by alert status decision + +```{r final_status_percentage} +cas <- unique(outcomes$final_outcome) + +perc_final_outcome <- outcomes %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_non_cas = prop_to_perc(non_cas/total), + perc_suspect = prop_to_perc(suspect/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + +perc_final_outcome %>% +ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +##Final status for the past 21 days + +###Absolute number of alerts by final status and status decision comparison for the past 21 days + +```{r final_status_recent} + +outcomes_recent %>% + filter(!is.na(final_outcome)) %>% +ggplot(aes(x = decision_comparison, fill = final_outcome)) + + geom_bar() + + scale_final_outcome + + scale_x_discrete(drop = FALSE) + + facet_grid(.~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Nombre total d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des trois dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +###Percentage of final status by alert status decision for the past 21 days + +```{r final_status_percentage_recent} +cas <- unique(outcomes_recent$final_outcome) + +perc_final_outcome_recent <- outcomes_recent %>% + filter(!is.na(final_outcome)) %>% + mutate(confirme=final_outcome=='confirme', + non_cas=final_outcome=='non_cas', + suspect=final_outcome=='suspect') %>% + count(top_zones, decision_comparison, confirme, non_cas, suspect) %>% + mutate(confirme=case_when(!confirme~0, + confirme~as.numeric(n)), + suspect=case_when(!suspect~0, + suspect~as.numeric(n)), + non_cas=case_when(!non_cas~0, + non_cas~as.numeric(n))) %>% + mutate(total= confirme + non_cas + suspect, + perc_confirme = prop_to_perc(confirme/total), + perc_suspect = prop_to_perc(suspect/total)) %>% + pivot_longer(cols = starts_with("perc"), + names_to = "Proportion") + + +perc_final_outcome_recent %>% +ggplot(aes(x = decision_comparison, y= value, fill= Proportion)) + + geom_col(color = "white") + + scale_final_outcome_perc + + scale_x_discrete(drop = FALSE) + + facet_grid(. ~ top_zones , scales = "free_y")+ + guides(fill = guide_legend(ncol = 2)) + + labs(x = "", + y = "Pourcentage d'alertes\n", + title = "Distribution des alertes par statut final, \ndécision de validation et par zone de santé", + subtitle= "Données des 3 dernières semaines") + + theme(legend.position = "bottom") + + large_txt + + rotate_x_text(45) + +``` + + +### Table - final status overall + +```{r table_final_status, fig.keep = "all"} + +perc_final_outcome %>% + show_table() + +``` + + + +### Table - final status for the past 21 days + +```{r table_final_status_recent, fig.keep = "all"} + +perc_final_outcome_recent %>% + show_table() + +``` + + + + + +# Export data and tables {.tabset .tabset-fade .tabset-pills} + + +```{r reactivate_recent, include = FALSE} +knitr::opts_chunk$set(eval = TRUE) +``` + + +## Outline + +We export the clean data to the clean data folder and some of the relevant tables, which will be placed in the current +working directory. + + + + +## Export clean data + +We export some of the clean database, placed in `produced_rds/` as well as in +`data/clean/`: + +```{r export_rds, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +## create the text for the file name with the database date +rds_file_name <- sprintf("%sclean_%s.rds", + undated_file_name(current_mangina), + format(database_date, "%Y-%m-%d")) +rds_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_rds", rds_file_name)) + +``` + +We copy these files to the `data/clean` folder: + +```{r copy_rds, eval = TRUE} +# copy some files into `data/clean/` + +if (!dir.exists("data/clean")) { + dir.create("data/clean") +} + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + rds_file_name) +# Copy the rds data +file.copy(from = file.path("produced_rds", rds_file_name), + to = destination, + overwrite = TRUE) + +``` + + + + +## Excel files + +The following code exports all tables named in `to_report` to `xslx` files, +stored inside the folder `produced_xlsx`: + +### Cleaned alerts database + +```{r exports_tables, eval = TRUE} + +cleaned_alerts_database_mangina <- x + +to_export <- c("cleaned_alerts_database_mangina", + "table_validation_overall_time", + "table_validation_overall_past_3_weeks", + "table_hz_total_validation", + "table_hz_over_time_validation", + "table_hz_total_recent_validation", + "table_hz_over_time_recent_validation", + "table_ha_total_validation", + "table_ha_over_time_validation", + "table_ha_total_recent_validation", + "table_ha_over_time_recent_validation", + "table_origins", + "table_origins_recent", + "table_hz_total_origins", + "table_hz_over_time_origins", + "table_hz_total_recent_origins", + "table_hz_over_time_recent_origins", + "table_ha_total_origins", + "table_ha_over_time_origins", + "table_ha_total_recent_origins", + "table_ha_over_time_recent_origins", + "table_decisions", + "table_sens_spec", + "table_decisions_recent", + "table_hz_total_decisions", + "table_hz_over_time_decisions", + "table_hz_total_recent_decisions", + "table_hz_over_time_recent_decisions", + "table_ha_total_decisions", + "table_ha_over_time_decisions", + "table_ha_total_recent_decisions", + "table_ha_over_time_recent_decisions", + "perc_final_outcome", + "perc_final_outcome_recent", + "table_unknown_as") + +``` + +```{r xlsx_exports, eval = TRUE} + +## check if a directory exists and if not then creates it +if (!dir.exists("produced_xlsx")) { + dir.create("produced_xlsx") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_xlsx", + paste0(e, ".xlsx"))) +} + +``` + + +We copy the main data file to the `data/clean` folder: + +```{r export_xlsx, eval = TRUE} + +## create the text for the file name with the database date +xlsx_file_name <- sprintf("%sclean_%s.xlsx", + undated_file_name(current_mangina), + format(database_date, "%Y-%m-%d")) +xlsx_file_name + +## save the rds file in the produced_rds folder +rio::export(x, + file.path("produced_xlsx", xlsx_file_name)) + +``` + + +```{r copy_xlsx, eval = TRUE} +# copy some files into `data/clean/` + +# Provide the destination of where to copy the data +destination <- here("data", + "clean", + xlsx_file_name) +# Copy the rds data +file.copy(from = file.path("produced_xlsx", xlsx_file_name), + to = destination, + overwrite = TRUE) + +``` + + + +Click on the following links to open the files (only works if the files above +have been generated and are in the same folder as this document): + + +```{r xlsx_links, results = "asis", eval = TRUE} + + +for (e in to_export) { + txt <- sprintf("- [%s.xlsx](%s.xlsx)", + e, + file.path("produced_xlsx", + e)) + cat(txt, sep = "\n") +} + +``` + + + +## R objects + +The following code exports all tables named in `to_report` to `rds` files, +stored inside the folder `produced_rds`: + +```{r rds_exports, eval = TRUE} + +if (!dir.exists("produced_rds")) { + dir.create("produced_rds") +} + +for (e in to_export) { + rio::export(get(e), + file.path("produced_rds", + paste0(e, ".rds"))) +} + +``` + + + + + +# System information {.tabset .tabset-fade .tabset-pills} + + +## Outline + +The following information documents the system on which the document was +compiled. + + + +## System + +This provides information on the operating system. + +```{r system_info} +Sys.info() +``` + + +## R environment + +This provides information on the version of R used: + +```{r R_session} +R.version +``` + + + +## R packages + +This provides information on the packages used: + +```{r R_pkg} +sessionInfo() +``` + + +## Compilation parameters + +This shows which parameters were passed through `params` at compilation time: + +```{r params} +params +```