Skip to content

Commit

Permalink
update reports
Browse files Browse the repository at this point in the history
Merge branch 'master' of github.com:susansjy22/HPCell

Conflicts:
	inst/rmd/Empty_droplet_report.Rmd
  • Loading branch information
susansjy22 committed Feb 23, 2024
2 parents 543ee98 + 13a4a25 commit b1cc940
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 89 deletions.
50 changes: 41 additions & 9 deletions R/execute_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,17 @@ run_targets_pipeline <- function(
tar_target(input_read, readRDS(read_file),
pattern = map(read_file),
iteration = "list", deployment = "main"),

tar_target(unique_tissues,
get_unique_tissues(input_read),
pattern = map(input_read),
iteration = "list", deployment = "main"
),
# tar_target(
# tissue_subsets,
# input_read, split.by = "Tissue"),
# pattern = map(input_read),
# iteration = "list"
# ),
tar_target(reference_read, reference_file, deployment = "main"),

# Identifying empty droplets
Expand Down Expand Up @@ -263,15 +273,37 @@ run_targets_pipeline <- function(
tar_target(calc_UMAP_dbl_report, calc_UMAP(input_read),
pattern = map(input_read),
iteration = "list"),
# tar_target(empty_droplets_report, rmarkdown::render(
# input = "./inst/rmd/Empty_droplet_report.Rmd",
# output_file = store,
# params = list(x1 = input_read, x2 = empty_droplets_tbl)
# )),
# tar_render(
# name = Technical_variation_report, # The name of the target
# path = path_to_technical_variation_report,
# params = list(x1= input_read, x2= empty_droplets_tbl, x3 = annotation_label_transfer_tbl, x4 = unique_tissues)
# ),
#
# tar_render(
# name = empty_droplets_report, # The name of the target
# path = path,
# params = list(x1= input_read, x2= empty_droplets_tbl, x3 = annotation_label_transfer_tbl, x4 = unique_tissues)
# ),
tar_render(
name = doublet_identification_report,
path = paste0(system.file(package = "HPCell"), "/rmd/Doublet_identification_report.Rmd"),
params = list(x1 = input_read,
x2 = calc_UMAP_dbl_report,
x3 = doublet_identification_tbl,
x4 = annotation_label_transfer_tbl,
x5 = sample_column |> quo_name())
),
tar_render(
name = Technical_variation_report,
path = paste0(system.file(package = "HPCell"), "/rmd/Technical_variation_report.Rmd"),
params = list(x1 = tar_read(input_read, store = "/vast/scratch/users/si.j/store8"),
x2 = tar_read(empty_droplets_tbl, store = "/vast/scratch/users/si.j/store8")
)
),
tar_render(
name = empty_droplets_report, # The name of the target
path = "./inst/rmd/Empty_droplet_report.Rmd",
params = list(x1= input_read, x2= empty_droplets_tbl, x3 = annotation_label_transfer_tbl)
name = pseudobulk_processing_report,
path = paste0(system.file(package = "HPCell"), "/rmd/pseudobulk_analysis_report.Rmd"),
params = list(x1 = tar_read(pseudobulk_merge_all_samples, store = "/vast/scratch/users/si.j/store8"))
)
))
}, script = glue("{store}.R"), ask = FALSE)
Expand Down
6 changes: 6 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -528,3 +528,9 @@ calc_UMAP <- function(input_seurat){
RunUMAP(dims = 1:30, spread = 0.5,min.dist = 0.01, n.neighbors = 10L) |>
as_tibble()
}
#' Subsetting input dataset into a list of seurat objects by sample/ tissue
#'
#' Function to subset Seurat object by tissue
get_unique_tissues <- function(seurat_object) {
unique(seurat_object@meta.data$Tissue)
}
36 changes: 23 additions & 13 deletions inst/rmd/Doublet_identification_report.Rmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: "Doublet_identification_report"
title: "Doublet identification report"
author: "SS"
date: "2023-12-05"
output: html_document
Expand All @@ -8,6 +8,7 @@ params:
x2: "NA"
x3: "NA"
x4: "NA"
x5: "NA"
---

```{r setup, include=FALSE}
Expand Down Expand Up @@ -52,14 +53,16 @@ get_labels_clusters = function(.data, label_column, dim1, dim2){
}
```

## Comprehensive UMAP Visualization of Cell Typing and Doublet Detection Across Tissue Samples
This visualization highlights the clustering of cell types and identifies singlets and doublets in the population. This allows for an exploration of similarities and differences in gene expression profiles between cells from different tissues.

```{r, out.width='100%', fig.width=15, fig.height=10, warning=FALSE, message=FALSE, echo=FALSE}
# Joining info and returning a list opf tibbles
merged_combined_annotation_doublets <- list(
#x1,
x2,
x3,
x4
#params$x1,
params$x2,
params$x3,
params$x4
) |>
pmap(
~ ..1 |>
Expand Down Expand Up @@ -152,19 +155,26 @@ plot_merged_combined_annotation_doublets
# patchwork::wrap_elements() |>
# map(~ .x |>
# left_join(
# x4 |>
# params$x4 |>
# purrr::reduce(bind_rows), by=".cell"
# ) |>
#
# #join doublets identified
# left_join(
# x3 |>
# params$x3 |>
# purrr::reduce(bind_rows), by = c(".cell")
# )
# )
```

## Singlet and Doublet Cell Distributions Across Tissues
Each bar in the plot corresponds to a specific cell type within each tissue.

From this plot, we can infer:

1. The overall quality of the cell separation process in the sequencing data, indicated by the proportion of singlets to doublets.
2. Potential differences in the rate of doublet formation between cell types, which might be related to cell size and tissue type

```{r, out.width='100%', fig.width=15, fig.height=10, warning=FALSE, message=FALSE, echo=FALSE}
# 2a) Create the composition of the doublets
Expand All @@ -176,22 +186,22 @@ doublet_composition<- merged_combined_annotation_doublets %>%
~ {
#browser()
.x|>
dplyr::select(sample_column, scDblFinder.class)}
dplyr::select(all_of(params$x5), scDblFinder.class)}
)) |>
#table()|>
dplyr::select(sample_name, doublet_composition) |>
deframe()
#calculate proportion and plot
merged_combined_annotation_doublets<- merged_combined_annotation_doublets |>
merged_combined_annotation_doublets <-
merged_combined_annotation_doublets |>
mutate(doublet_composition_plot = map(
annotated_metadata,
~ .x |>
~ .x |>
#browser() |>
# create frquency column
count(Tissue, Cell_type_in_each_tissue, scDblFinder.class, name= "count_class") |>
dplyr::count(Tissue, Cell_type_in_each_tissue, scDblFinder.class, name= "count_class") |>
group_by(Tissue, Cell_type_in_each_tissue) |>
mutate(proportion = count_class/sum(count_class)) |>
ungroup() |>
Expand Down
6 changes: 5 additions & 1 deletion inst/rmd/Empty_droplet_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ params:
x1: "NA"
x2: "NA"
x3: "NA"
x4: "NA"
---

```{r, warning=FALSE, message=FALSE, echo=FALSE}
Expand Down Expand Up @@ -42,6 +43,9 @@ library(magrittr)
library(qs)
library(S4Vectors)
# Subsetting tissues in input data
unique_tissues <- unique([email protected]$Tissue)
assay = params$x1[[1]]@assays |> names() |> extract2(1)
# Subset 2 tissues (sample types)
# heart <- subset(input_seurat, subset = Tissue == "Heart")
Expand Down Expand Up @@ -215,7 +219,7 @@ plot_hist <- hist_p_val(combined_df)
plot_hist
```

Percentage of reads assigned to mitochondrial transcrips against library size
##Percentage of reads assigned to mitochondrial transcrips against library size
```{r, warning=FALSE, message=FALSE, echo=FALSE}
plot_mito_data <- function(input_seurat, tissue_name, annotation_labels){
c = rownames(input_seurat) |> str_which("^MT")
Expand Down
Loading

0 comments on commit b1cc940

Please sign in to comment.