carpentries · klbarnes20 · Nov 23, 2023 · Nov 8, 2023 · Nov 10, 2023 · Nov 10, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,9 +12,11 @@ Imports:
   ggplot2,
   airtabler,
   lubridate, 
-  rlang
+  rlang,
+  scales,
+  tidyr
 Remotes:
-  bergant/airtabler,
+  bergant/airtabler
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,23 +1,32 @@
 # Generated by roxygen2: do not edit by hand
 
+export(add_text_geom)
 export(badged_n_days)
 export(calculate_avg_time_to_checkout)
 export(calculate_checkout_not_finished)
 export(calculate_checkout_rate)
 export(calculate_checkout_started)
 export(calculate_dropout_rate)
+export(calculate_nps)
 export(calculate_rate)
 export(calculate_reengagement_rate)
 export(calculate_wksurvey_item_mean)
+export(clean_select_all_responses)
 export(count_trainings)
 export(count_trainings_rel_yr)
 export(count_trainings_yr)
+export(custom_scale_x_discrete)
 export(extract_year_quarter)
 export(fetch_airtable)
 export(fetch_redash)
+export(gather_and_summarise)
 export(generate_plot)
+export(group_by_summarise)
+export(load_libraries)
 export(map_availability)
+export(map_scheduling_group)
 export(map_timezones)
+export(percent_label)
 export(preprocess_trainee_data)
 export(save_plot)
 export(set_time_frame)
@@ -27,17 +36,24 @@ export(trainees_n_days)
 import(dplyr)
 importFrom(airtabler,airtable)
 importFrom(dplyr,case_when)
+importFrom(dplyr,count)
 importFrom(dplyr,filter)
+importFrom(dplyr,group_by)
 importFrom(dplyr,mutate)
 importFrom(dplyr,rename)
+importFrom(dplyr,select)
+importFrom(dplyr,summarize)
 importFrom(ggplot2,aes_string)
 importFrom(ggplot2,facet_grid)
 importFrom(ggplot2,geom_col)
 importFrom(ggplot2,geom_text)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,ggsave)
 importFrom(lubridate,year)
-importFrom(paintbrush,theme_carpentries)
 importFrom(rlang,sym)
+importFrom(scales,label_percent)
 importFrom(stringr,str_detect)
+importFrom(stringr,str_trim)
+importFrom(tidyr,gather)
+importFrom(tidyr,separate_rows)
 importFrom(utils,read.csv)
diff --git a/R/instructor_training.R b/R/instructor_training.R
@@ -119,7 +119,7 @@ map_timezones <- function(df) {
       stringr::str_detect(timezone, "\\bUTC[-]2[:]?30\\b") ~ "TZ3",
       stringr::str_detect(timezone, "\\bUTC[-]3\\b") ~ "TZ3",
       stringr::str_detect(timezone, "\\bUTC[-]1\\b") ~ "TZ3",
-      stringr::str_detect(timezone, "\\bUTC[ ]?0\\b") ~ "TZ4",
+      stringr::str_detect(timezone, "\\bUTC[)]\\b") ~ "TZ4",
       stringr::str_detect(timezone, "\\bUTC[+]1\\b") ~ "TZ4",
       stringr::str_detect(timezone, "\\bUTC[+]2\\b") ~ "TZ4",
       stringr::str_detect(timezone, "\\bUTC[+]3\\b") ~ "TZ4",
@@ -139,6 +139,68 @@ map_timezones <- function(df) {
   return(df)
 }
 
+#' Create Scheduling Groups
+#'
+#'
+#' @param df Dataframe; The data frame containing timezone information. Comes from Trainer Quarterly Availability Form.
+#'
+#' @return Dataframe; The data frame with an additional 'sched_group'
+#' column.
+#' Throws an error if no matching column or multiple matching columns are found.
+#'
+#' @importFrom stringr str_detect
+#' @importFrom dplyr rename mutate
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'   # Assume 'data_with_timezones' is a data frame with an appropriate timezone column
+#'   grouped_timezones <- map_timezones(data_with_timezones)
+#'   print(grouped_timezones)
+#' }
+#'
+map_scheduling_group <- function(df) {
+  # The regex pattern matches both spaces and dots
+  matching_cols <-
+    grepl(
+      "^What time zone are you located in",
+      names(df),
+      ignore.case = TRUE
+    )
+  if (sum(matching_cols) > 1) {
+    stop("Error: Multiple matching columns found.")
+  }
+  if (sum(matching_cols) == 0) {
+    stop("No matching column name found.")
+  }
+  col_index <- which(matching_cols)
+  df <- df %>%
+    rename(timezone = names(df)[col_index])
+
+  df <- df %>%
+    mutate(
+      FD_ET = ifelse(
+        str_detect(timezone, "UTC[-]7|UTC[-]6|UTC[-]5|UTC[-]4|UTC[-]3|UTC[-]2[:]?30"), "FD_ET", NA), #EST +/- 2, also includes Newfoundland timezone
+      FD_PT = ifelse(
+        str_detect(timezone, "UTC[-]10|UTC[-]9|UTC[-]8|UTC[-]7|UTC[-]6"), "FD_PT", NA), # PST +/- 2
+      FD_CET = ifelse(
+        str_detect(timezone, "UTC[-]1|UTC[)]|UTC[+]1|UTC[+]2"), "FD_CET", NA), # CEST +/- 2
+      FD_AET = ifelse(
+        str_detect(timezone, "UTC[+]12|UTC[+]11|UTC[+]10|UTC[+]9|UTC[+]8"), "FD_AET", NA), # AEST +/- 2
+      HD_PM_PT = ifelse(
+        str_detect(timezone, "UTC[+]10|UTC[+]11|UTC[+]12|UTC[-]12|UTC[-]11|UTC[-]10|UTC[-]9|UTC[-]8|UTC[-]7|UTC[-]6"), "HD_PM_PT", NA), # PST + 6 hours before and 2 hours after
+      HD_PM_ET = ifelse(
+        str_detect(timezone, "UTC[-]11|UTC[-]10|UTC[-]9|UTC[-]8|UTC[-]7|UTC[-]6|UTC[-]5|UTC[-]4|UTC[-]3"), "HD_PM_ET", NA),
+      HD_PM_CET = ifelse(
+        str_detect(timezone, "UTC[-]5|UTC[-]4|UTC[-]3|UTC[-]2[:]?30|UTC[-]2|UTC[-]1|UTC[ ]?0|UTC[+]1|UTC[+]2|UTC[+]3"), "HD_PM_CET", NA),
+      HD_PM_AET = ifelse(
+        str_detect(timezone, "UTC[+]4|UTC[+]5|UTC[+]6|UTC[+]7|UTC[+]8|UTC[+]9|UTC[+]10|UTC[+]11|UTC[+]12"), "HD_PM_AET", NA)
+    )
+
+
+  return(df)
+}
+
 #' Extract Quarter and Year Information from Data Frame Name
 #'
 #' This function extracts the quarter and year information from a given data frame name.

diff --git a/R/plotting.R b/R/plotting.R
@@ -13,7 +13,6 @@
 #' @return A ggplot object representing the generated plot.
 #' @export
 #' @importFrom ggplot2 ggplot aes_string geom_col geom_text facet_grid
-#' @importFrom paintbrush theme_carpentries
 #
 #'
 #' @examples
@@ -29,8 +28,7 @@ generate_plot <- function(dat, x, y, fill = NULL) {
   plot <- ggplot(dat, aes_string(x = x, y = y, fill = fill)) +
     geom_col(position = position_dodge(width = 0.9)) +
     geom_text(aes(label = after_stat(y)), stat = "identity", vjust = -0.3, position = position_dodge(width = 0.9)) +
-    facet_grid(year ~ quarter) +
-    theme_carpentries()
+    facet_grid(year ~ quarter)
   return(plot)
 }
 
@@ -59,3 +57,96 @@ save_plot <- function(dir, plot) {
   print(path)
   ggsave(path, plot = plot, device = 'png', dpi = 300, width = 4, height = 3)
 }
+
+#' Add Data Labels to a ggplot2 Plot
+#'
+#' This function adds text labels to a ggplot2 plot. It supports two types of labels: counts and percentages.
+#' The labels can be customized in terms of their accuracy, color, vertical adjustment, and font face.
+#'
+#' @param label_type A character string specifying the type of label to add.
+#'                   Acceptable values are "count" for count labels and "percent" for percentage labels.
+#' @param accuracy An optional numeric value specifying the number of decimal places for percentage labels.
+#'                 Only applicable if `label_type` is "percent". Default is 0.1.
+#' @param color A character string specifying the color of the text labels. Default is "white".
+#' @param vjust A numeric value for vertical adjustment of the text labels.
+#'              Positive values move text up, and negative values move it down. Default is 1.6.
+#' @param fontface A character string specifying the font face for text labels.
+#'                 Common values are "plain", "bold", "italic", "bold.italic". Default is "bold".
+#' @param nudge_x A numeric value specifying horizontal adjustment of the text labels.
+#'                Positive values move text to the right, and negative values move it to the left. Default is 0.
+#'
+#' @importFrom ggplot2 geom_text
+#' @importFrom scales label_percent
+#'
+#' @return A ggplot2 layer with the specified text labels added.
+#' @export
+#'
+#' @examples
+
+
+add_text_geom <- function(label_type = "count", accuracy = 0.1, color = "white", vjust = 1.6, fontface = "bold", nudge_x = 0) {
+  if (label_type == "count") {
+    plot + geom_text(aes(label = count), color = color, vjust = vjust, nudge_x = nudge_x, fontface = fontface)
+  } else if (label_type == "percent") {
+    geom_text(aes(label = scales::label_percent(accuracy = accuracy)(percent)), color = color, vjust = vjust, fontface = fontface)
+  }
+}
+
+#' Format and Text Wrap X Axis Labels in ggplot2
+#'
+#' This function applies text wrapping to x-axis labels in a ggplot2 plot.
+#' It allows for customizing the width of the wrap, making long labels more readable by breaking them into multiple lines.
+#'
+#' @param labels A vector of labels to be used for the x-axis.
+#'               These labels should correspond to the categories or values present on the x-axis.
+#' @param wrap_width An integer specifying the maximum width (in characters) before wrapping a label onto the next line.
+#'                   The default value is 15.
+#'
+#' @return A ggplot2 scale layer, specifically a modified `scale_x_discrete`,
+#'         with the labels formatted to wrap text based on the specified `wrap_width`.
+#' @export
+#'
+#' @examples
+#' # Example using the diamonds dataset from ggplot2
+#' library(ggplot2)
+#' data(diamonds)
+#'
+#' # Create artificial long labels for illustration
+#' long_labels <- c("Fair Quality Cut", "Good Quality Cut", "Very Good Quality Cut",
+#'                  "Premium Quality Cut", "Ideal Quality Cut")
+#' names(long_labels) <- levels(diamonds$cut)
+#'
+#' basic_plot <- ggplot(diamonds, aes(x = cut, y = price)) + geom_bar(stat = "identity")
+#' # Apply custom scale with wrapped labels
+#' basic_plot + custom_scale_x_discrete(labels = long_labels, wrap_width = 10)
+custom_scale_x_discrete <- function(labels, wrap_width = 15) {
+  scale_x_discrete(
+    drop = FALSE,
+    labels = function(x) lapply(
+      strwrap(labels, width = wrap_width, simplify = FALSE),
+      paste,
+      collapse = "\n"
+    )
+  )
+}
+
+#' Add Percent Sign to Data Label
+#'
+#' This function formats numeric values as percentages. It rounds the numbers to the specified decimal places and appends a percent sign.
+#'
+#' @param x Numeric vector; the values to be formatted as percentages.
+#' @param decimal Integer; the number of decimal places to round the numeric values.
+#'               Defaults to 1, indicating one decimal place.
+#'
+#' @return A character vector with the numeric values formatted as percentages.
+#' @export
+#'
+#' @examples
+#' # Example usage of percent_label
+#' numeric_values <- c(0.123, 0.456, 0.789)
+#' percent_labels <- percent_label(numeric_values)
+#' print(percent_labels)
+#' # Output: "12.3%", "45.6%", "78.9%"
+percent_label <- function(x, decimal = 1) {
+  paste0(round(x, decimal), "%")
+}
diff --git a/R/surveys.R b/R/surveys.R
@@ -0,0 +1,104 @@
+
+
+#' Calculate Net Promoter Score (NPS)
+#'
+#' This function calculates the Net Promoter Score from survey data based on respondents' ratings.
+#' NPS is a measure of customer loyalty and is calculated using the formula:
+#' Percentage of Promoters - Percentage of Detractors.
+#' For more information on NPS, visit: https://monkeylearn.com/blog/nps-analysis/
+#'
+#' @param dat A data frame containing survey data.
+#' @param col A character string specifying the name of the column containing NPS ratings (expected to be numeric).
+#'
+#' @return A data frame summarizing the number and percentage of respondents in each category:
+#'         Promoters, Passives, and Detractors.
+#' @export
+#'
+#' @examples
+#' # Example usage with a sample data frame
+#' sample_data <- data.frame(rating = sample(0:10, 100, replace = TRUE))
+#' nps_result <- calculate_nps(sample_data, "rating")
+#' print(nps_result)
+calculate_nps <- function(dat, col) {
+  # Ensure col is a character vector of length 1
+  if (!is.character(col) || length(col) != 1) {
+    stop("Error: 'col' should be a single column name as character.")
+  }
+
+  # Check in the column exists in the data frame
+  if (!col %in% names(dat)) {
+    stop("Error: Specified column does not exist in the data frame.")
+  }
+
+  # Check if hte dat in the column is numeric
+  if (!is.numeric(dat[[col]])) {
+    stop("Error: Data in the specified column is not numeric.")
+  }
+  # Calculate net promoter score
+  table <- dat %>%
+    mutate(Category = case_when(
+      .[[col]] <= 6 ~ 'Detractor',
+      .[[col]] %in% 7:8 ~ 'Passive',
+      .[[col]] >= 9 ~ 'Promoter',
+      TRUE ~ NA_character_
+    )) %>%
+    filter(!is.na(Category)) %>%
+    group_by(Category) %>%
+    summarise(Number = n(), .groups = 'drop') %>%
+    mutate(Percent = (Number / sum(Number)))
+
+}
+
+#' Gather Questions into One Table for Graphing
+#'
+#' This function transforms survey data by converting selected question columns into a long format
+#' suitable for comparison and graphing. It facilitates analysis across multiple questions.
+#'
+#' @param dat A data frame containing survey data.
+#' @param questions A character vector of column names in 'dat' representing the questions to be summarized.
+#' @param levels A vector of the levels (possible responses) for the questions.
+#' @param labels A vector of labels corresponding to the 'levels' vector for re-labeling responses.
+#'
+#' @importFrom dplyr select mutate group_by summarize
+#' @importFrom tidyr gather
+#'
+#' @return A data frame in long format, with each row representing a response to a question,
+#'         along with the count of each response type.
+#' @export
+#'
+#' @examples
+#' # Example usage with a sample data frame
+#' sample_data <- data.frame(Q1 = sample(1:5, 100, replace = TRUE), Q2 = sample(1:5, 100, replace = TRUE))
+#' questions <- c("Q1", "Q2")
+#' levels <- 1:5
+#' labels <- c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")
+#' summarised_data <- gather_and_summarise(sample_data, questions, levels, labels)
+#' print(summarised_data)
+gather_and_summarise <- function(dat, questions, levels, labels){
+  # Validation Checks
+  if (!is.data.frame(dat)) {
+    stop("Error: 'dat' should be a data frame.")
+  }
+
+  if (!all(questions %in% names(dat))) {
+    stop("Error: Some questions in 'questions' are not column names in 'dat'.")
+  }
+
+  if (length(levels) != length(labels)) {
+    stop("Error: The length of 'levels' and 'labels' must be the same.")
+  }
+
+  # Convert the selected columns into a long format
+  tryCatch({
+    table <- dat %>%
+      select(all_of(questions)) %>%
+      gather(key = "Question", value = "Answer") %>%
+      mutate(Answer = factor(Answer, levels = levels, labels = labels)) %>%
+      group_by(Question, Answer) %>%
+      summarize(Number = n())
+
+    return(table)
+  }, error = function(e) {
+    stop("Error in processing data: ", e$message)
+  })
+}