Skip to content

Commit

Permalink
Merge pull request #78 from LieberInstitute/add-metric-qc-function
Browse files Browse the repository at this point in the history
Add metric QC function
  • Loading branch information
lcolladotor authored Jul 10, 2024
2 parents 30a8b3a + a2a013c commit ecf983d
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ Imports:
limma,
statmod,
MatrixGenerics,
rlang
rlang,
dplyr
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
URL: https://github.com/LieberInstitute/spatialLIBD
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(add10xVisiumAnalysis)
export(add_images)
export(add_key)
export(add_qc_metrics)
export(annotate_registered_clusters)
export(check_modeling_results)
export(check_sce)
Expand Down Expand Up @@ -83,8 +84,14 @@ importFrom(SummarizedExperiment,"rowRanges<-")
importFrom(SummarizedExperiment,assay)
importFrom(SummarizedExperiment,assayNames)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(benchmarkme,get_ram)
importFrom(cowplot,plot_grid)
importFrom(dplyr,group_by)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,summarize)
importFrom(edgeR,calcNormFactors)
importFrom(edgeR,filterByExpr)
importFrom(fields,image.plot)
Expand Down Expand Up @@ -124,6 +131,7 @@ importFrom(methods,new)
importFrom(png,readPNG)
importFrom(rlang,arg_match)
importFrom(rtracklayer,import)
importFrom(scater,isOutlier)
importFrom(scater,plotReducedDim)
importFrom(scuttle,aggregateAcrossCells)
importFrom(sessioninfo,session_info)
Expand Down
144 changes: 144 additions & 0 deletions R/add_qc_metrics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#' Quality Control for Spatial Data
#'
#' This function identify spots in a
#' [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] (SPE)
#' with outlier quality control values: low `sum_umi` or `sum_gene`, or high
#' `expr_chrM_ratio`, utilizing `scran::isOutlier()`. Also identifies in-tissue
#' edge spots and distance to the edge for each spot.
#'
#' @param spe a [SpatialExperiment][SpatialExperiment::SpatialExperiment-class]
#'
#' @return A [SpatialExperiment][SpatialExperiment::SpatialExperiment-class]
#' with added quiality control information added to the colData.
#'
#' @export
#' @importFrom dplyr group_by summarize left_join select mutate
#' @importFrom SummarizedExperiment colData
#' @importFrom scater isOutlier
#'
#' @examples
#' if (enough_ram()) {
#' ## Obtain the necessary data
#' if (!exists("spe")) spe <- fetch_data("spe")
#'
#' ## fake out tissue spots in example data (TODO add pre-qc data)
#' spe_qc <- spe
#' spe_qc$in_tissue[spe_qc$array_col < 10] <- FALSE
#'
#' ## adds QC metrics to colData of the spe
#' spe_qc <- add_qc_metrics(spe_qc)
#' colData(spe_qc)
#'
#' ## visualize edge spots
#' vis_clus(spe_qc, sampleid = "151507", clustervar = "edge_spot")
#' vis_gene(spe_qc, sampleid = "151507", geneid = "edge_distance", minCount = -1)
#'
#' ## visualize scran QC flags
#'
#' vis_clus(spe_qc, sample_id = "151507", clustervar = "scran_low_lib_size")
#'
#' # scater::plotColData(spe_qc[, spe_qc$in_tissue], x = "sample_id", y = "sum_umi", colour_by = "scran_low_lib_size")
#'
#' vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_discard")
#' vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_low_lib_size_edge")
#' }
#'
add_qc_metrics <- function(spe) {
stopifnot("in_tissue" %in% colnames(colData(spe)))
stopifnot("sum_umi" %in% colnames(colData(spe)))
stopifnot("sum_gene" %in% colnames(colData(spe)))
stopifnot("expr_chrM_ratio" %in% colnames(colData(spe)))

spe$in_tissue <- as.logical(spe$in_tissue)
spe_in <- spe[, spe$in_tissue]

## QC in-tissue spots

# define variables
low_lib_size <- low_n_features <- in_tissue <- sample_id <- NULL

qc_df <- data.frame(
log2sum = log2(spe_in$sum_umi),
log2detected = log2(spe_in$sum_gene),
subsets_Mito_percent = spe_in$expr_chrM_ratio * 100,
sample_id = spe_in$sample_id
)

qcfilter <- data.frame(
low_lib_size = scater::isOutlier(qc_df$log2sum, type = "lower", log = TRUE, batch = qc_df$sample_id),
low_n_features = scater::isOutlier(qc_df$log2detected, type = "lower", log = TRUE, batch = qc_df$sample_id),
high_subsets_Mito_percent = scater::isOutlier(qc_df$subsets_Mito_percent, type = "higher", batch = qc_df$sample_id)
) |>
dplyr::mutate(discard = (low_lib_size | low_n_features) | high_subsets_Mito_percent)

## Add qcfilter cols to colData(spe) after factoring
## discard
spe$scran_discard <- NA
spe$scran_discard[which(spe$in_tissue)] <- qcfilter$discard
spe$scran_discard <- factor(spe$scran_discard, levels = c("TRUE", "FALSE"))

## low_lib_size
spe$scran_low_lib_size <- NA
spe$scran_low_lib_size[which(spe$in_tissue)] <- qcfilter$low_lib_size
spe$scran_low_lib_size <- factor(spe$scran_low_lib_size,
levels = c("TRUE", "FALSE")
)
## low_n_features
spe$scran_low_n_features <- NA
spe$scran_low_n_features[which(spe$in_tissue)] <- qcfilter$low_n_features
spe$scran_low_n_features <- factor(spe$scran_low_n_features,
levels = c("TRUE", "FALSE")
)

## high mito percent
spe$scran_high_Mito_percent <- NA
spe$scran_high_Mito_percent[which(spe$in_tissue)] <-
qcfilter$high_subsets_Mito_percent
spe$scran_high_Mito_percent <-
factor(spe$scran_high_Mito_percent, levels = c("TRUE", "FALSE"))

## Find edge spots
# define variables
array_row <- array_col <- edge_row <- edge_col <- row_distance <- NULL
col_distance <- high_subsets_Mito_percent <- NULL

spot_coords <- colData(spe_in) |>
as.data.frame() |>
select(in_tissue, sample_id, array_row, array_col) |>
group_by(sample_id, array_row) |>
mutate(
edge_col = array_col == min(array_col) | array_col == max(array_col),
col_distance = pmin(
abs(array_col - min(array_col)),
abs(array_col - max(array_col))
)
) |>
group_by(sample_id, array_col) |>
mutate(
edge_row = array_row == min(array_row) | array_row == max(array_row),
row_distance = pmin(
abs(array_row - min(array_row)),
abs(array_row - max(array_row))
)
) |>
group_by(sample_id) |>
mutate(
edge_spot = edge_row | edge_col,
edge_distance = pmin(row_distance, col_distance)
)


## Add Edge info to spe
spe$edge_spot <- NA
spe$edge_spot[which(spe$in_tissue)] <- spot_coords$edge_spot
spe$edge_spot <- factor(spe$edge_spot, levels = c("TRUE", "FALSE"))

spe$edge_distance <- NA
spe$edge_distance[which(spe$in_tissue)] <- spot_coords$edge_distance

spe$scran_low_lib_size_edge <- NA
spe$scran_low_lib_size_edge[which(spe$in_tissue)] <- qcfilter$low_lib_size & spot_coords$edge_spot
spe$scran_low_lib_size_edge <- factor(spe$scran_low_lib_size_edge, levels = c("TRUE", "FALSE"))

return(spe)
}
50 changes: 50 additions & 0 deletions man/add_qc_metrics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test-add_qc_metrics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test_that(
"add_qc_metrics returns modified spe",
{
if (!exists("spe")) spe <- fetch_data("spe")

# run metrics spe
spe_qc <- add_qc_metrics(spe)
expect_equal(ncol(spe), ncol(spe_qc)) ## same number of spots
expect_equal(ncol(colData(spe)) + 7, ncol(colData(spe_qc))) ## add 7 QC cols to colData
# [1] "scran_discard" "scran_low_lib_size" "scran_low_n_features"
# [4] "scran_high_subsets_Mito_percent" "edge_spot" "edge_distance"
# [7] "scran_low_lib_size_edge"
}
)

0 comments on commit ecf983d

Please sign in to comment.