update reports

Merge branch 'master' of github.com:susansjy22/HPCell Conflicts: inst/rmd/Empty_droplet_report.Rmd
susansjy22 · Feb 23, 2024 · b1cc940 · b1cc940
2 parents 543ee98 + 13a4a25
commit b1cc940
Show file tree

Hide file tree

Showing 5 changed files with 187 additions and 89 deletions.
diff --git a/R/execute_pipeline.R b/R/execute_pipeline.R
@@ -177,7 +177,17 @@ run_targets_pipeline <- function(
       tar_target(input_read, readRDS(read_file),
                  pattern = map(read_file),
                  iteration = "list", deployment = "main"),
-
+      tar_target(unique_tissues,
+                 get_unique_tissues(input_read),
+                 pattern = map(input_read),
+                 iteration = "list", deployment = "main"
+                 ),
+      # tar_target(
+      #   tissue_subsets,
+      #   input_read, split.by = "Tissue"), 
+      #   pattern = map(input_read),
+      #   iteration = "list"
+      # ),
       tar_target(reference_read, reference_file, deployment = "main"),
 
       # Identifying empty droplets
@@ -263,15 +273,37 @@ run_targets_pipeline <- function(
       tar_target(calc_UMAP_dbl_report, calc_UMAP(input_read), 
                  pattern = map(input_read), 
                  iteration = "list"), 
-      # tar_target(empty_droplets_report, rmarkdown::render(
-      #   input = "./inst/rmd/Empty_droplet_report.Rmd",
-      #   output_file = store,
-      #   params = list(x1 = input_read, x2 = empty_droplets_tbl)
-      # )), 
+      # tar_render(
+      #   name = Technical_variation_report, # The name of the target
+      #   path = path_to_technical_variation_report,
+      #   params = list(x1= input_read, x2= empty_droplets_tbl, x3 = annotation_label_transfer_tbl, x4 = unique_tissues)
+      # ), 
+      # 
+      # tar_render(
+      #   name = empty_droplets_report, # The name of the target
+      #   path = path,
+      #   params = list(x1= input_read, x2= empty_droplets_tbl, x3 = annotation_label_transfer_tbl, x4 = unique_tissues)
+      # ), 
+      tar_render(
+        name = doublet_identification_report, 
+        path = paste0(system.file(package = "HPCell"), "/rmd/Doublet_identification_report.Rmd"), 
+        params = list(x1 = input_read, 
+                      x2 = calc_UMAP_dbl_report, 
+                      x3 = doublet_identification_tbl, 
+                      x4 = annotation_label_transfer_tbl, 
+                      x5 = sample_column |> quo_name())
+      ), 
+      tar_render(
+        name = Technical_variation_report,
+        path =  paste0(system.file(package = "HPCell"), "/rmd/Technical_variation_report.Rmd"),
+        params = list(x1 = tar_read(input_read, store = "/vast/scratch/users/si.j/store8"),
+                      x2 = tar_read(empty_droplets_tbl, store = "/vast/scratch/users/si.j/store8")
+        )
+      ),
       tar_render(
-        name = empty_droplets_report, # The name of the target
-        path = "./inst/rmd/Empty_droplet_report.Rmd", 
-        params = list(x1= input_read, x2= empty_droplets_tbl, x3 = annotation_label_transfer_tbl)
+        name = pseudobulk_processing_report, 
+        path = paste0(system.file(package = "HPCell"), "/rmd/pseudobulk_analysis_report.Rmd"), 
+        params = list(x1 = tar_read(pseudobulk_merge_all_samples, store = "/vast/scratch/users/si.j/store8"))
       )
       ))
   }, script = glue("{store}.R"), ask = FALSE)

diff --git a/R/utilities.R b/R/utilities.R
@@ -528,3 +528,9 @@ calc_UMAP <- function(input_seurat){
     RunUMAP(dims = 1:30, spread    = 0.5,min.dist  = 0.01, n.neighbors = 10L) |> 
     as_tibble()
 }
+#' Subsetting input dataset into a list of seurat objects by sample/ tissue 
+#' 
+#' Function to subset Seurat object by tissue
+get_unique_tissues <- function(seurat_object) {
+  unique(seurat_object@meta.data$Tissue)
+}
diff --git a/inst/rmd/Doublet_identification_report.Rmd b/inst/rmd/Doublet_identification_report.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Doublet_identification_report"
+title: "Doublet identification report"
 author: "SS"
 date: "2023-12-05"
 output: html_document
@@ -8,6 +8,7 @@ params:
   x2: "NA"
   x3: "NA"
   x4: "NA"
+  x5: "NA"
 ---
 
 ```{r setup, include=FALSE}
@@ -52,14 +53,16 @@ get_labels_clusters = function(.data, label_column, dim1, dim2){
 }
 ```
 
+## Comprehensive UMAP Visualization of Cell Typing and Doublet Detection Across Tissue Samples
+This visualization highlights the clustering of cell types and identifies singlets and doublets in the population. This allows for an exploration of similarities and differences in gene expression profiles between cells from different tissues. 
 
 ```{r, out.width='100%', fig.width=15, fig.height=10, warning=FALSE, message=FALSE, echo=FALSE}
 # Joining info and returning a list opf tibbles
 merged_combined_annotation_doublets <- list(
-  #x1,
-  x2,
-  x3,
-  x4
+  #params$x1,
+  params$x2,
+  params$x3,
+  params$x4
 ) |>
     pmap(
         ~ ..1 |>
@@ -152,19 +155,26 @@ plot_merged_combined_annotation_doublets
   # patchwork::wrap_elements() |> 
   # map(~ .x |>
   #        left_join(
-  #       x4 |>
+  #       params$x4 |>
   #       purrr::reduce(bind_rows), by=".cell"
   #     ) |>
   # 
   #     #join doublets identified
   #     left_join(
-  #         x3 |>
+  #         params$x3 |>
   #         purrr::reduce(bind_rows), by = c(".cell")
   #       )
   # )
 
 ```
 
+## Singlet and Doublet Cell Distributions Across Tissues 
+Each bar in the plot corresponds to a specific cell type within each tissue.
+
+From this plot, we can infer: 
+
+1. The overall quality of the cell separation process in the sequencing data, indicated by the proportion of singlets to doublets.
+2. Potential differences in the rate of doublet formation between cell types, which might be related to cell size and tissue type
 
 ```{r, out.width='100%', fig.width=15, fig.height=10, warning=FALSE, message=FALSE, echo=FALSE}
 # 2a) Create the composition of the doublets
@@ -176,22 +186,22 @@ doublet_composition<- merged_combined_annotation_doublets %>%
     ~ {
       #browser() 
       .x|> 
-      dplyr::select(sample_column, scDblFinder.class)}
+      dplyr::select(all_of(params$x5), scDblFinder.class)}
       )) |> 
       #table()|> 
   dplyr::select(sample_name, doublet_composition) |> 
   deframe()
   
 
 #calculate proportion and plot
-  merged_combined_annotation_doublets<- merged_combined_annotation_doublets |>
+  merged_combined_annotation_doublets <- 
+    merged_combined_annotation_doublets |>
   mutate(doublet_composition_plot = map(
     annotated_metadata,
-    ~ .x |> 
-
+      ~ .x |> 
+      #browser() |> 
   # create frquency column
-    
-  count(Tissue, Cell_type_in_each_tissue, scDblFinder.class, name= "count_class") |> 
+  dplyr::count(Tissue, Cell_type_in_each_tissue, scDblFinder.class, name= "count_class") |> 
     group_by(Tissue, Cell_type_in_each_tissue) |> 
     mutate(proportion = count_class/sum(count_class)) |>
     ungroup() |>

diff --git a/inst/rmd/Empty_droplet_report.Rmd b/inst/rmd/Empty_droplet_report.Rmd
@@ -7,6 +7,7 @@ params:
   x1: "NA"
   x2: "NA"
   x3: "NA"
+  x4: "NA"
 ---
 
 ```{r,  warning=FALSE, message=FALSE, echo=FALSE}
@@ -42,6 +43,9 @@ library(magrittr)
 library(qs)
 library(S4Vectors)
 
+# Subsetting tissues in input data 
+unique_tissues <- unique([email protected]$Tissue)
+
 assay = params$x1[[1]]@assays |> names() |> extract2(1)
 # Subset 2 tissues (sample types)
 # heart <- subset(input_seurat, subset = Tissue == "Heart")
@@ -215,7 +219,7 @@ plot_hist <- hist_p_val(combined_df)
 plot_hist
 ```
 
-Percentage of reads assigned to mitochondrial transcrips against library size
+##Percentage of reads assigned to mitochondrial transcrips against library size
 ```{r,  warning=FALSE, message=FALSE, echo=FALSE}
 plot_mito_data <- function(input_seurat, tissue_name, annotation_labels){
   c = rownames(input_seurat) |> str_which("^MT")