CTG_report_large_screen.Rmd

---
params:
    data_file: "~/Downloads/CTG_Calico_Low/CAL_ctg_viability_data.Rds"
title: "`r basename(dirname(params$data_file))`"
author: "Andrew Boghossian"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F, include = F, warning = F, error = F, message = F)
library(plyr)
library(tidyverse)
library(ggthemes)
library(magrittr)
library(future.apply)
library(psych)
library(ComplexHeatmap)
library(viridis)
library(circlize)
library(ggridges)
theme_set(theme_pander(base_size = 12))
suppressMessages(source("src/helperFunctions.R"))
suppressMessages(source("src/make_dose_curves.R", chdir = T))

project <- word(basename(dirname(params$data_file)), 2, -1, sep = fixed("_"))
```

```{r load data}
normalized_data <- read_rds(params$data_file)
if (all(is.na(normalized_data$plate_map_name)) &
    "Source Barcode" %in% colnames(normalized_data)) {
  normalized_data %<>% dplyr::mutate(plate_map_name = `Source Barcode`)
}
```

# Introduction

This is a QC of the `r project` CTG plates.

# Luminescence and viability distributions {.tabset .tabset-pills}

## Luminescence distributions {.tabset .tabset-fade}

```{r luminescence, results = 'asis', include = T, fig.height = 20, fig.width=12}
for (cur_plate in normalized_data$plate_map_name %>% unique()) {
  cat(sprintf('\n\n### %s {.tabset .tabset-fade}\n\n', cur_plate, '\n\n'))
  df <- normalized_data %>% 
    dplyr::filter(plate_map_name == cur_plate) %>%
    dplyr::mutate(lum = lum + 1)
  
  # p <- df %>% 
  #   ggplot(aes(x = lum, color = pert_type)) + 
  #   geom_density() +
  #   geom_histogram(aes( y = ..density.., fill = pert_type), alpha = .3, position = 'identity') + 
  #   scale_x_log10() +
  #   labs(x = "Luminescence", y = "Density", color = "Type") +
  #   ggtitle('Distribution of luminescence, separated by treatment type') +
  #   scale_color_tableau(guide = "none") + scale_fill_tableau(name = "pert_type") +
  #   facet_grid(ccle_name ~ replicate, scales = "free_y")
  # 
  # print(p)
  
  p <- df %>% 
    ggplot(aes(y = ccle_name, x = lum, color = pert_type, fill = pert_type)) + 
    geom_density_ridges(alpha = .3, scale = 1) + 
    scale_x_log10() +
    labs(x = "Luminescence", y = "Cell Line", color = "Type", fill = "Type") +
    ggtitle('Distribution of luminescence, separated by treatment type') +
    scale_color_tableau(guide = "none") + scale_fill_tableau(name = "Type") +
    facet_grid(. ~ replicate, scales = "free_y")
  
  print(p)
  cat("\n\n")
}
```

## Viability distributions (poscon normalized fold-change) {.tabset .tabset-fade}

```{r, results= 'asis', include = T, fig.height = 20, fig.width=12}
for (cur_plate in normalized_data$plate_map_name %>% unique()) {
  cat(sprintf('\n\n### %s {.tabset .tabset-fade}\n\n', cur_plate, '\n\n'))
  df <- normalized_data %>% 
    dplyr::filter(plate_map_name == cur_plate, is.finite(viability), viability > 0)
  
  p <- df %>% 
    ggplot(aes(y = ccle_name, x = viability, color = pert_type, fill = pert_type)) + 
    geom_density_ridges(alpha = .3, scale = 1) + 
    scale_x_log10() +
    labs(x = "Viability (poscon normalized)", y = "Cell Line", color = "Type", fill = "Type") +
    ggtitle('Distribution of viability, separated by treatment type') +
    scale_color_tableau(guide = "none") + scale_fill_tableau(name = "Type") +
    facet_grid(. ~ replicate, scales = "free_y")
  
  print(p)
  cat("\n\n")
}
```

## Viability distributions (fold-change) {.tabset .tabset-fade}

```{r, results = 'asis', include = T, fig.height = 20, fig.width=12}
for (cur_plate in normalized_data$plate_map_name %>% unique()) {
  cat(sprintf('\n\n### %s {.tabset .tabset-fade}\n\n', cur_plate, '\n\n'))
  df <- normalized_data %>% 
    dplyr::filter(plate_map_name == cur_plate, is.finite(log_fc))
  
  p <- df %>% 
    ggplot(aes(y = ccle_name, x = 2^log_fc, color = pert_type, fill = pert_type)) + 
    geom_density_ridges(alpha = .3, scale = 1) + 
    labs(x = "Viability", y = "Cell Line", color = "Type", fill = "Type") +
    scale_x_log10() +
    ggtitle('Distribution of viability, separated by treatment type') +
    scale_color_tableau(guide = "none") + scale_fill_tableau(name = "Type") +
    facet_grid(. ~ replicate, scales = "free_y")
  
  print(p)
  cat("\n\n")
}
```

# SSMDs

```{r calc ssmd}
ssmds <- normalized_data %>%
  dplyr::group_by(arp_barcode, ccle_name, plate_map_name, replicate) %>%
  dplyr::summarise(ssmd = calculate_ssmd_robust(log_fc[pert_type == 'poscon'],
                                                log_fc[pert_type == 'negcon'])) %>%
  dplyr::ungroup()
```

```{r ssmd plot, include = T, fig.width=12, fig.height=8}
ssmds %>%
  DT::datatable()

ssmds %>%
  ggplot(aes(x = ssmd, fill = paste(word(ccle_name, 1, sep = fixed("_")), plate_map_name, replicate))) + 
  geom_histogram() + 
  labs(x = "SSMD", y = "Count", fill = "Cell Line Replicate") +
  theme(legend.position = "none") +
  scale_y_continuous(limits = c(0, NA)) +
  geom_vline(aes(xintercept = -2), linetype = "dashed")
```

# Replicate correlations {.tabset .tabset-pills}

## Viability (poscon normalized)
```{r, include = T, fig.height=12, fig.width=12}
normalized_data %>%
  dplyr::filter(pert_type != "treatment" | !is.na(dose),
                is.finite(viability)) %>%
  tidyr::pivot_wider(id_cols = c("ccle_name", "pert_type", "dose", "broad_id"),
                     names_from = c("plate_map_name", "replicate"),
                     values_from = "viability",
                     values_fn = median) %>%
  dplyr::select(-ccle_name, -dose, -pert_type, -broad_id) %>%
  psych::pairs.panels(ellipses = F, lm = T, hist.col = tableau_color_pal()(1))

```

## Viability (log fold-change)
```{r, include = T, fig.height=12, fig.width=12}
normalized_data %>%
  dplyr::filter(!is.na(dose), is.finite(log_fc)) %>%
  tidyr::pivot_wider(id_cols = c("ccle_name", "pert_type", "dose", "broad_id"),
                     names_from = c("plate_map_name", "replicate"),
                     values_from = "log_fc",
                     values_fn = median) %>%
  dplyr::select(-ccle_name, -dose, -pert_type, -broad_id) %>%
  psych::pairs.panels(ellipses = F, lm = T, hist.col = tableau_color_pal()(1))

```

# Does viability decrease with dose?

## Spearman correlation

We can use spearman correlation to measure the monotonicity of dose vs viability. 

Negative correlations indicate dose-response behavior.

```{r correlations, include = TRUE, fig.height=10, fig.width=24}
spearman_correlations <- tryCatch(
  expr = {normalized_data %>%
      dplyr::group_by(arp_barcode, broad_id, ccle_name, plate_map_name, replicate) %>%
      dplyr::filter(length(unique(dose)) > 3) %>%
      dplyr::summarise(spearman = cor(viability, dose, method = 'spearman', use = 'p'), .groups = "drop") %>%
      dplyr::ungroup()
  }, error = function(e) {return(NA)}
)

if (!is.na(spearman_correlations)) {
 ggplot(spearman_correlations, aes(y = spearman, x = replicate)) + 
  geom_boxplot(aes(fill = replicate)) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(axis.text.x = element_text(angle = 45)) + 
  scale_fill_tableau(guide = "none") +
  facet_grid(plate_map_name ~ ccle_name, scales = 'free') +
  labs(x = "Replicate", y = "Spearman Correlation") 
} else {
  print("Unable to calculate correlations not enough doses")
}
```

# Heatmaps {.tabset .tabset-pills}
```{r, results='asis', include = T, fig.height=3, fig.width=8}
for (cell_line in (normalized_data %>% 
                   dplyr::filter(is.finite(lum)))$ccle_name %>% unique()) {
  cat(sprintf('\n\n## %s {.tabset .tabset-pills}\n\n', cell_line, '\n\n'))
  df <- normalized_data %>% dplyr::filter(ccle_name == cell_line) %>%
    dplyr::mutate(plate_row = str_sub(Well_Position, 1, 1),
                  plate_col = str_sub(Well_Position, 2, -1)) %>%
    dplyr::arrange(replicate)
  plots <- list(); ix <- 1
  for (plate in df$arp_barcode %>% unique) {
    plot_df <- df %>% 
      dplyr::filter(arp_barcode == plate) %>%
      dplyr::distinct(replicate, plate_row, plate_col, lum, pert_type) %>%
      dplyr::arrange(plate_col, plate_row)
    if (all(is.na(plot_df$lum))) next
    rep <- unique(plot_df$replicate)
    plot_mat <- plot_df %>%
      reshape2::acast(plate_row ~ plate_col, value.var = "lum")
    type_mat <- plot_df %>%
      dplyr::mutate(pert_type = str_sub(pert_type, 1, 1)) %>%
      reshape2::acast(plate_row ~ plate_col, value.var = "pert_type")
    
    p <- ComplexHeatmap::Heatmap(plot_mat, cluster_rows = F, cluster_columns = F,
                                 heatmap_legend_param = list(title = "Luminescence"),
                                 row_title = paste(plate, rep),
                                 col = viridis(1000),
                                 cell_fun = function(j, i, x, y, w, h, col) {grid.text(type_mat[i, j], x, y)})
    # plots[[ix]] <- p; ix <- ix +1
    print(p)
    cat("\n\n")
  }
  # ht_list <- reduce(plots, `%v%`)
  # draw(ht_list, merge_legend = F, show_heatmap_legend = F)
  # cat("\n\n")
}
```