Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scheduling groups #11

Merged
merged 8 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ Imports:
ggplot2,
airtabler,
lubridate,
rlang
rlang,
scales,
tidyr
Remotes:
bergant/airtabler,
bergant/airtabler
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Expand Down
18 changes: 17 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
# Generated by roxygen2: do not edit by hand

export(add_text_geom)
export(badged_n_days)
export(calculate_avg_time_to_checkout)
export(calculate_checkout_not_finished)
export(calculate_checkout_rate)
export(calculate_checkout_started)
export(calculate_dropout_rate)
export(calculate_nps)
export(calculate_rate)
export(calculate_reengagement_rate)
export(calculate_wksurvey_item_mean)
export(clean_select_all_responses)
export(count_trainings)
export(count_trainings_rel_yr)
export(count_trainings_yr)
export(custom_scale_x_discrete)
export(extract_year_quarter)
export(fetch_airtable)
export(fetch_redash)
export(gather_and_summarise)
export(generate_plot)
export(group_by_summarise)
export(load_libraries)
export(map_availability)
export(map_scheduling_group)
export(map_timezones)
export(percent_label)
export(preprocess_trainee_data)
export(save_plot)
export(set_time_frame)
Expand All @@ -27,17 +36,24 @@ export(trainees_n_days)
import(dplyr)
importFrom(airtabler,airtable)
importFrom(dplyr,case_when)
importFrom(dplyr,count)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,summarize)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,facet_grid)
importFrom(ggplot2,geom_col)
importFrom(ggplot2,geom_text)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggsave)
importFrom(lubridate,year)
importFrom(paintbrush,theme_carpentries)
importFrom(rlang,sym)
importFrom(scales,label_percent)
importFrom(stringr,str_detect)
importFrom(stringr,str_trim)
importFrom(tidyr,gather)
importFrom(tidyr,separate_rows)
importFrom(utils,read.csv)
64 changes: 63 additions & 1 deletion R/instructor_training.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ map_timezones <- function(df) {
stringr::str_detect(timezone, "\\bUTC[-]2[:]?30\\b") ~ "TZ3",
stringr::str_detect(timezone, "\\bUTC[-]3\\b") ~ "TZ3",
stringr::str_detect(timezone, "\\bUTC[-]1\\b") ~ "TZ3",
stringr::str_detect(timezone, "\\bUTC[ ]?0\\b") ~ "TZ4",
stringr::str_detect(timezone, "\\bUTC[)]\\b") ~ "TZ4",
stringr::str_detect(timezone, "\\bUTC[+]1\\b") ~ "TZ4",
stringr::str_detect(timezone, "\\bUTC[+]2\\b") ~ "TZ4",
stringr::str_detect(timezone, "\\bUTC[+]3\\b") ~ "TZ4",
Expand All @@ -139,6 +139,68 @@ map_timezones <- function(df) {
return(df)
}

#' Create Scheduling Groups
#'
#'
#' @param df Dataframe; The data frame containing timezone information. Comes from Trainer Quarterly Availability Form.
#'
#' @return Dataframe; The data frame with an additional 'sched_group'
#' column.
#' Throws an error if no matching column or multiple matching columns are found.
#'
#' @importFrom stringr str_detect
#' @importFrom dplyr rename mutate
#' @export
#'
#' @examples
#' \dontrun{
#' # Assume 'data_with_timezones' is a data frame with an appropriate timezone column
#' grouped_timezones <- map_timezones(data_with_timezones)
#' print(grouped_timezones)
#' }
#'
map_scheduling_group <- function(df) {
# The regex pattern matches both spaces and dots
matching_cols <-
grepl(
"^What time zone are you located in",
names(df),
ignore.case = TRUE
)
if (sum(matching_cols) > 1) {
stop("Error: Multiple matching columns found.")
}
if (sum(matching_cols) == 0) {
stop("No matching column name found.")
}
col_index <- which(matching_cols)
df <- df %>%
rename(timezone = names(df)[col_index])

df <- df %>%
mutate(
FD_ET = ifelse(
str_detect(timezone, "UTC[-]7|UTC[-]6|UTC[-]5|UTC[-]4|UTC[-]3|UTC[-]2[:]?30"), "FD_ET", NA), #EST +/- 2, also includes Newfoundland timezone
FD_PT = ifelse(
str_detect(timezone, "UTC[-]10|UTC[-]9|UTC[-]8|UTC[-]7|UTC[-]6"), "FD_PT", NA), # PST +/- 2
FD_CET = ifelse(
str_detect(timezone, "UTC[-]1|UTC[)]|UTC[+]1|UTC[+]2"), "FD_CET", NA), # CEST +/- 2
FD_AET = ifelse(
str_detect(timezone, "UTC[+]12|UTC[+]11|UTC[+]10|UTC[+]9|UTC[+]8"), "FD_AET", NA), # AEST +/- 2
HD_PM_PT = ifelse(
str_detect(timezone, "UTC[+]10|UTC[+]11|UTC[+]12|UTC[-]12|UTC[-]11|UTC[-]10|UTC[-]9|UTC[-]8|UTC[-]7|UTC[-]6"), "HD_PM_PT", NA), # PST + 6 hours before and 2 hours after
HD_PM_ET = ifelse(
str_detect(timezone, "UTC[-]11|UTC[-]10|UTC[-]9|UTC[-]8|UTC[-]7|UTC[-]6|UTC[-]5|UTC[-]4|UTC[-]3"), "HD_PM_ET", NA),
HD_PM_CET = ifelse(
str_detect(timezone, "UTC[-]5|UTC[-]4|UTC[-]3|UTC[-]2[:]?30|UTC[-]2|UTC[-]1|UTC[ ]?0|UTC[+]1|UTC[+]2|UTC[+]3"), "HD_PM_CET", NA),
HD_PM_AET = ifelse(
str_detect(timezone, "UTC[+]4|UTC[+]5|UTC[+]6|UTC[+]7|UTC[+]8|UTC[+]9|UTC[+]10|UTC[+]11|UTC[+]12"), "HD_PM_AET", NA)
)


return(df)
}

#' Extract Quarter and Year Information from Data Frame Name
#'
#' This function extracts the quarter and year information from a given data frame name.
Expand Down
97 changes: 94 additions & 3 deletions R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#' @return A ggplot object representing the generated plot.
#' @export
#' @importFrom ggplot2 ggplot aes_string geom_col geom_text facet_grid
#' @importFrom paintbrush theme_carpentries
#
#'
#' @examples
Expand All @@ -29,8 +28,7 @@ generate_plot <- function(dat, x, y, fill = NULL) {
plot <- ggplot(dat, aes_string(x = x, y = y, fill = fill)) +
geom_col(position = position_dodge(width = 0.9)) +
geom_text(aes(label = after_stat(y)), stat = "identity", vjust = -0.3, position = position_dodge(width = 0.9)) +
facet_grid(year ~ quarter) +
theme_carpentries()
facet_grid(year ~ quarter)
return(plot)
}

Expand Down Expand Up @@ -59,3 +57,96 @@ save_plot <- function(dir, plot) {
print(path)
ggsave(path, plot = plot, device = 'png', dpi = 300, width = 4, height = 3)
}

#' Add Data Labels to a ggplot2 Plot
#'
#' This function adds text labels to a ggplot2 plot. It supports two types of labels: counts and percentages.
#' The labels can be customized in terms of their accuracy, color, vertical adjustment, and font face.
#'
#' @param label_type A character string specifying the type of label to add.
#' Acceptable values are "count" for count labels and "percent" for percentage labels.
#' @param accuracy An optional numeric value specifying the number of decimal places for percentage labels.
#' Only applicable if `label_type` is "percent". Default is 0.1.
#' @param color A character string specifying the color of the text labels. Default is "white".
#' @param vjust A numeric value for vertical adjustment of the text labels.
#' Positive values move text up, and negative values move it down. Default is 1.6.
#' @param fontface A character string specifying the font face for text labels.
#' Common values are "plain", "bold", "italic", "bold.italic". Default is "bold".
#' @param nudge_x A numeric value specifying horizontal adjustment of the text labels.
#' Positive values move text to the right, and negative values move it to the left. Default is 0.
#'
#' @importFrom ggplot2 geom_text
#' @importFrom scales label_percent
#'
#' @return A ggplot2 layer with the specified text labels added.
#' @export
#'
#' @examples


add_text_geom <- function(label_type = "count", accuracy = 0.1, color = "white", vjust = 1.6, fontface = "bold", nudge_x = 0) {
if (label_type == "count") {
plot + geom_text(aes(label = count), color = color, vjust = vjust, nudge_x = nudge_x, fontface = fontface)
} else if (label_type == "percent") {
geom_text(aes(label = scales::label_percent(accuracy = accuracy)(percent)), color = color, vjust = vjust, fontface = fontface)
}
}

#' Format and Text Wrap X Axis Labels in ggplot2
#'
#' This function applies text wrapping to x-axis labels in a ggplot2 plot.
#' It allows for customizing the width of the wrap, making long labels more readable by breaking them into multiple lines.
#'
#' @param labels A vector of labels to be used for the x-axis.
#' These labels should correspond to the categories or values present on the x-axis.
#' @param wrap_width An integer specifying the maximum width (in characters) before wrapping a label onto the next line.
#' The default value is 15.
#'
#' @return A ggplot2 scale layer, specifically a modified `scale_x_discrete`,
#' with the labels formatted to wrap text based on the specified `wrap_width`.
#' @export
#'
#' @examples
#' # Example using the diamonds dataset from ggplot2
#' library(ggplot2)
#' data(diamonds)
#'
#' # Create artificial long labels for illustration
#' long_labels <- c("Fair Quality Cut", "Good Quality Cut", "Very Good Quality Cut",
#' "Premium Quality Cut", "Ideal Quality Cut")
#' names(long_labels) <- levels(diamonds$cut)
#'
#' basic_plot <- ggplot(diamonds, aes(x = cut, y = price)) + geom_bar(stat = "identity")
#' # Apply custom scale with wrapped labels
#' basic_plot + custom_scale_x_discrete(labels = long_labels, wrap_width = 10)
custom_scale_x_discrete <- function(labels, wrap_width = 15) {
scale_x_discrete(
drop = FALSE,
labels = function(x) lapply(
strwrap(labels, width = wrap_width, simplify = FALSE),
paste,
collapse = "\n"
)
)
}

#' Add Percent Sign to Data Label
#'
#' This function formats numeric values as percentages. It rounds the numbers to the specified decimal places and appends a percent sign.
#'
#' @param x Numeric vector; the values to be formatted as percentages.
#' @param decimal Integer; the number of decimal places to round the numeric values.
#' Defaults to 1, indicating one decimal place.
#'
#' @return A character vector with the numeric values formatted as percentages.
#' @export
#'
#' @examples
#' # Example usage of percent_label
#' numeric_values <- c(0.123, 0.456, 0.789)
#' percent_labels <- percent_label(numeric_values)
#' print(percent_labels)
#' # Output: "12.3%", "45.6%", "78.9%"
percent_label <- function(x, decimal = 1) {
paste0(round(x, decimal), "%")
}
104 changes: 104 additions & 0 deletions R/surveys.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@


#' Calculate Net Promoter Score (NPS)
#'
#' This function calculates the Net Promoter Score from survey data based on respondents' ratings.
#' NPS is a measure of customer loyalty and is calculated using the formula:
#' Percentage of Promoters - Percentage of Detractors.
#' For more information on NPS, visit: https://monkeylearn.com/blog/nps-analysis/
#'
#' @param dat A data frame containing survey data.
#' @param col A character string specifying the name of the column containing NPS ratings (expected to be numeric).
#'
#' @return A data frame summarizing the number and percentage of respondents in each category:
#' Promoters, Passives, and Detractors.
#' @export
#'
#' @examples
#' # Example usage with a sample data frame
#' sample_data <- data.frame(rating = sample(0:10, 100, replace = TRUE))
#' nps_result <- calculate_nps(sample_data, "rating")
#' print(nps_result)
calculate_nps <- function(dat, col) {
# Ensure col is a character vector of length 1
if (!is.character(col) || length(col) != 1) {
stop("Error: 'col' should be a single column name as character.")
}

# Check in the column exists in the data frame
if (!col %in% names(dat)) {
stop("Error: Specified column does not exist in the data frame.")
}

# Check if hte dat in the column is numeric
if (!is.numeric(dat[[col]])) {
stop("Error: Data in the specified column is not numeric.")
}
# Calculate net promoter score
table <- dat %>%
mutate(Category = case_when(
.[[col]] <= 6 ~ 'Detractor',
.[[col]] %in% 7:8 ~ 'Passive',
.[[col]] >= 9 ~ 'Promoter',
TRUE ~ NA_character_
)) %>%
filter(!is.na(Category)) %>%
group_by(Category) %>%
summarise(Number = n(), .groups = 'drop') %>%
mutate(Percent = (Number / sum(Number)))

}

#' Gather Questions into One Table for Graphing
#'
#' This function transforms survey data by converting selected question columns into a long format
#' suitable for comparison and graphing. It facilitates analysis across multiple questions.
#'
#' @param dat A data frame containing survey data.
#' @param questions A character vector of column names in 'dat' representing the questions to be summarized.
#' @param levels A vector of the levels (possible responses) for the questions.
#' @param labels A vector of labels corresponding to the 'levels' vector for re-labeling responses.
#'
#' @importFrom dplyr select mutate group_by summarize
#' @importFrom tidyr gather
#'
#' @return A data frame in long format, with each row representing a response to a question,
#' along with the count of each response type.
#' @export
#'
#' @examples
#' # Example usage with a sample data frame
#' sample_data <- data.frame(Q1 = sample(1:5, 100, replace = TRUE), Q2 = sample(1:5, 100, replace = TRUE))
#' questions <- c("Q1", "Q2")
#' levels <- 1:5
#' labels <- c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")
#' summarised_data <- gather_and_summarise(sample_data, questions, levels, labels)
#' print(summarised_data)
gather_and_summarise <- function(dat, questions, levels, labels){
# Validation Checks
if (!is.data.frame(dat)) {
stop("Error: 'dat' should be a data frame.")
}

if (!all(questions %in% names(dat))) {
stop("Error: Some questions in 'questions' are not column names in 'dat'.")
}

if (length(levels) != length(labels)) {
stop("Error: The length of 'levels' and 'labels' must be the same.")
}

# Convert the selected columns into a long format
tryCatch({
table <- dat %>%
select(all_of(questions)) %>%
gather(key = "Question", value = "Answer") %>%
mutate(Answer = factor(Answer, levels = levels, labels = labels)) %>%
group_by(Question, Answer) %>%
summarize(Number = n())

return(table)
}, error = function(e) {
stop("Error in processing data: ", e$message)
})
}
Loading
Loading