diff --git a/articles/getting_started.html b/articles/getting_started.html index 63eac9c..f128738 100644 --- a/articles/getting_started.html +++ b/articles/getting_started.html @@ -84,7 +84,7 @@

Michael Hicks

Johns Hopkins Bloomberg School of Public Health, Baltimore, MD, -USA

2024-02-15

+USA

2024-03-20

Source: vignettes/getting_started.Rmd
getting_started.Rmd
@@ -144,27 +144,28 @@

Spot-level local outlier detection#> [13] "total" # Identifying local outliers using SpotSweeper -features <- c('sum' ,'detected', "subsets_Mito_percent") -spe<- localOutliers(spe, - features=features, - n_neighbors=18, - data_output=TRUE, - method="multivariate" - ) +spe <- localOutliers(spe, + metric="sum", + direction="lower", + log=TRUE +) -# show column data after SpotSweeper -colnames(colData(spe)) -#> [1] "barcode_id" "sample_id" -#> [3] "in_tissue" "array_row" -#> [5] "array_col" "ground_truth" -#> [7] "cell_count" "sum" -#> [9] "detected" "subsets_Mito_sum" -#> [11] "subsets_Mito_detected" "subsets_Mito_percent" -#> [13] "total" "sum_log2" -#> [15] "detected_log2" "subsets_Mito_percent_log2" -#> [17] "coords" "local_outliers" -#> [19] "sum_z" "detected_z" -#> [21] "subsets_Mito_percent_z" "LOF" +spe <- localOutliers(spe, + metric="detected", + direction="lower", + log=TRUE +) + +spe <- localOutliers(spe, + metric="subsets_Mito_percent", + direction="higher", + log=FALSE +) + +# combine all outliers into "local_outliers" column +spe$local_outliers <- as.logical(spe$sum_outliers) | + as.logical(spe$detected_outliers) | + as.logical(spe$subsets_Mito_percent_outliers)

We can now visualize local_outliers vs one of the QC metrics, sum_log2, with help from the escheR package.

@@ -172,26 +173,31 @@

Spot-level local outlier detectionlibrary(escheR) library(ggpubr) -# plotting using escheR -p1 <- make_escheR(spe) |> - add_fill(var = "sum_log2", point_size=1.25) + - scale_fill_gradient(low ="white",high = "darkgreen") +# library size +p1 <- plotOutliers(spe, metric="sum_log2", + outliers="sum_outliers", point_size=1.1) + + ggtitle("Library Size") + +# unique genes +p2 <- plotOutliers(spe, metric="detected_log2", + outliers="detected_outliers", point_size=1.1) + + ggtitle("Unique Genes") + +# mitochondrial percent +p3 <- plotOutliers(spe, metric="subsets_Mito_percent", + outliers="subsets_Mito_percent_outliers", point_size=1.1) + + ggtitle("Mitochondrial Percent") -p2 <- make_escheR(spe) |> - add_fill(var = "sum_log2", point_size=1.25) |> - add_ground(var = "local_outliers", stroke = 1) + - scale_color_manual( - name = "", # turn off legend name for ground_truth - values = c( - "TRUE" = "red", - "FALSE" = "transparent") - ) + - scale_fill_gradient(low ="white",high = "darkgreen") +# all local outliers +p4 <- plotOutliers(spe, metric="sum_log2", + outliers="local_outliers", point_size=1.1, stroke=0.75) + + ggtitle("All Local Outliers") -plot_list <- list(p1, p2) +# plot +plot_list <- list(p1, p2, p3, p4) ggarrange( plotlist = plot_list, - ncol = 2, nrow = 1, + ncol = 2, nrow = 2, common.legend = FALSE )

@@ -201,9 +207,9 @@

Session information
 utils::sessionInfo()
-#> R version 4.3.2 (2023-10-31)
+#> R version 4.3.3 (2024-02-29)
 #> Platform: x86_64-pc-linux-gnu (64-bit)
-#> Running under: Ubuntu 22.04.3 LTS
+#> Running under: Ubuntu 22.04.4 LTS
 #> 
 #> Matrix products: default
 #> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
@@ -224,70 +230,69 @@ 

Session information#> #> other attached packages: #> [1] ggpubr_0.6.0 escheR_1.2.0 -#> [3] ggplot2_3.4.4 STexampleData_1.10.0 +#> [3] ggplot2_3.5.0 STexampleData_1.10.1 #> [5] SpatialExperiment_1.12.0 SingleCellExperiment_1.24.0 #> [7] SummarizedExperiment_1.32.0 Biobase_2.62.0 -#> [9] GenomicRanges_1.54.1 GenomeInfoDb_1.38.6 +#> [9] GenomicRanges_1.54.1 GenomeInfoDb_1.38.8 #> [11] IRanges_2.36.0 S4Vectors_0.40.2 #> [13] MatrixGenerics_1.14.0 matrixStats_1.2.0 #> [15] ExperimentHub_2.10.0 AnnotationHub_3.10.0 -#> [17] BiocFileCache_2.10.1 dbplyr_2.4.0 +#> [17] BiocFileCache_2.10.1 dbplyr_2.5.0 #> [19] BiocGenerics_0.48.1 SpotSweeper_0.99.1 #> #> loaded via a namespace (and not attached): -#> [1] DBI_1.2.1 bitops_1.0-7 +#> [1] DBI_1.2.2 bitops_1.0-7 #> [3] rlang_1.1.3 magrittr_2.0.3 -#> [5] compiler_4.3.2 RSQLite_2.3.5 +#> [5] compiler_4.3.3 RSQLite_2.3.5 #> [7] DelayedMatrixStats_1.24.0 png_0.1-8 -#> [9] systemfonts_1.0.5 vctrs_0.6.5 -#> [11] stringr_1.5.1 pkgconfig_2.0.3 -#> [13] crayon_1.5.2 fastmap_1.1.1 -#> [15] backports_1.4.1 magick_2.8.2 -#> [17] XVector_0.42.0 ellipsis_0.3.2 -#> [19] labeling_0.4.3 scuttle_1.12.0 -#> [21] utf8_1.2.4 promises_1.2.1 -#> [23] rmarkdown_2.25 ragg_1.2.7 -#> [25] purrr_1.0.2 bit_4.0.5 -#> [27] xfun_0.42 beachmat_2.18.0 -#> [29] zlibbioc_1.48.0 cachem_1.0.8 -#> [31] jsonlite_1.8.8 blob_1.2.4 -#> [33] highr_0.10 later_1.3.2 -#> [35] DelayedArray_0.28.0 BiocParallel_1.36.0 -#> [37] interactiveDisplayBase_1.40.0 broom_1.0.5 -#> [39] parallel_4.3.2 R6_2.5.1 -#> [41] bslib_0.6.1 stringi_1.8.3 -#> [43] car_3.1-2 jquerylib_0.1.4 -#> [45] Rcpp_1.0.12 knitr_1.45 -#> [47] httpuv_1.6.14 Matrix_1.6-1.1 -#> [49] tidyselect_1.2.0 abind_1.4-5 -#> [51] yaml_2.3.8 codetools_0.2-19 -#> [53] curl_5.2.0 lattice_0.21-9 -#> [55] tibble_3.2.1 shiny_1.8.0 -#> [57] withr_3.0.0 KEGGREST_1.42.0 -#> [59] evaluate_0.23 desc_1.4.3 -#> [61] Biostrings_2.70.2 pillar_1.9.0 -#> [63] BiocManager_1.30.22 filelock_1.0.3 -#> [65] carData_3.0-5 generics_0.1.3 -#> [67] dbscan_1.1-12 RCurl_1.98-1.14 -#> [69] BiocVersion_3.18.1 munsell_0.5.0 -#> [71] scales_1.3.0 sparseMatrixStats_1.14.0 -#> [73] xtable_1.8-4 glue_1.7.0 -#> [75] tools_4.3.2 BiocNeighbors_1.20.2 -#> [77] ggsignif_0.6.4 fs_1.6.3 -#> [79] cowplot_1.1.3 grid_4.3.2 -#> [81] tidyr_1.3.1 colorspace_2.1-0 -#> [83] AnnotationDbi_1.64.1 GenomeInfoDbData_1.2.11 -#> [85] cli_3.6.2 rappdirs_0.3.3 -#> [87] textshaping_0.3.7 fansi_1.0.6 -#> [89] viridisLite_0.4.2 S4Arrays_1.2.0 -#> [91] dplyr_1.1.4 gtable_0.3.4 -#> [93] rstatix_0.7.2 sass_0.4.8 -#> [95] digest_0.6.34 SparseArray_1.2.4 -#> [97] farver_2.1.1 rjson_0.2.21 -#> [99] memoise_2.0.1 htmltools_0.5.7 -#> [101] pkgdown_2.0.7 lifecycle_1.0.4 -#> [103] httr_1.4.7 mime_0.12 -#> [105] bit64_4.0.5 MASS_7.3-60

+#> [9] systemfonts_1.0.6 vctrs_0.6.5 +#> [11] pkgconfig_2.0.3 crayon_1.5.2 +#> [13] fastmap_1.1.1 backports_1.4.1 +#> [15] magick_2.8.3 XVector_0.42.0 +#> [17] ellipsis_0.3.2 labeling_0.4.3 +#> [19] scuttle_1.12.0 utf8_1.2.4 +#> [21] promises_1.2.1 rmarkdown_2.26 +#> [23] ragg_1.3.0 purrr_1.0.2 +#> [25] bit_4.0.5 xfun_0.42 +#> [27] beachmat_2.18.1 zlibbioc_1.48.2 +#> [29] cachem_1.0.8 jsonlite_1.8.8 +#> [31] blob_1.2.4 highr_0.10 +#> [33] later_1.3.2 DelayedArray_0.28.0 +#> [35] BiocParallel_1.36.0 interactiveDisplayBase_1.40.0 +#> [37] broom_1.0.5 parallel_4.3.3 +#> [39] R6_2.5.1 bslib_0.6.1 +#> [41] car_3.1-2 jquerylib_0.1.4 +#> [43] Rcpp_1.0.12 knitr_1.45 +#> [45] httpuv_1.6.14 Matrix_1.6-5 +#> [47] tidyselect_1.2.1 abind_1.4-5 +#> [49] yaml_2.3.8 codetools_0.2-19 +#> [51] curl_5.2.1 lattice_0.22-5 +#> [53] tibble_3.2.1 withr_3.0.0 +#> [55] KEGGREST_1.42.0 shiny_1.8.0 +#> [57] evaluate_0.23 desc_1.4.3 +#> [59] Biostrings_2.70.3 pillar_1.9.0 +#> [61] BiocManager_1.30.22 filelock_1.0.3 +#> [63] carData_3.0-5 generics_0.1.3 +#> [65] RCurl_1.98-1.14 BiocVersion_3.18.1 +#> [67] sparseMatrixStats_1.14.0 munsell_0.5.0 +#> [69] scales_1.3.0 xtable_1.8-4 +#> [71] glue_1.7.0 tools_4.3.3 +#> [73] BiocNeighbors_1.20.2 ggsignif_0.6.4 +#> [75] fs_1.6.3 cowplot_1.1.3 +#> [77] grid_4.3.3 tidyr_1.3.1 +#> [79] AnnotationDbi_1.64.1 colorspace_2.1-0 +#> [81] GenomeInfoDbData_1.2.11 cli_3.6.2 +#> [83] rappdirs_0.3.3 textshaping_0.3.7 +#> [85] fansi_1.0.6 viridisLite_0.4.2 +#> [87] S4Arrays_1.2.1 dplyr_1.1.4 +#> [89] gtable_0.3.4 rstatix_0.7.2 +#> [91] sass_0.4.9 digest_0.6.35 +#> [93] SparseArray_1.2.4 farver_2.1.1 +#> [95] rjson_0.2.21 memoise_2.0.1 +#> [97] htmltools_0.5.7 pkgdown_2.0.7 +#> [99] lifecycle_1.0.4 httr_1.4.7 +#> [101] mime_0.12 bit64_4.0.5 +#> [103] MASS_7.3-60.0.1 diff --git a/articles/getting_started_files/figure-html/local_outlier_plot-1.png b/articles/getting_started_files/figure-html/local_outlier_plot-1.png index de37f45..9581314 100644 Binary files a/articles/getting_started_files/figure-html/local_outlier_plot-1.png and b/articles/getting_started_files/figure-html/local_outlier_plot-1.png differ diff --git a/index.html b/index.html index dc7ffcc..b83ed1f 100644 --- a/index.html +++ b/index.html @@ -5,14 +5,26 @@ - -SpotSweeper • SpotSweeper + +Spatially-aware quality control for spatial transcriptomics • SpotSweeper - - + + localOutliers Function — localOutliers • SpotSweeperlocalOutliers Function — localOutliers • SpotSweeper @@ -54,78 +58,60 @@
-

This function detects local outliers based on k-nearest neighbors based on either a univariate -(z-score thresholds per QC metrics) or multivariate approach (Local Outlier Factor).

+

This function detects local outliers in spatial transcriptomics data based on standard +quality control metrics, such as library size, unique genes, and mitochondrial ratio. +Local outliers are defined as spots with low/high quality metrics compared to their +surrounding neighbors, based on a modified z-score statistic.

Usage

localOutliers(
   spe,
+  metric = "detected",
+  direction = "lower",
   n_neighbors = 36,
-  features = c("sum_umi", "sum_gene", "expr_chrM_ratio"),
-  method = "multivariate",
   samples = "sample_id",
-  log2 = TRUE,
-  cutoff = 2.58,
-  scale = TRUE,
-  minPts = 20,
-  data_output = FALSE,
-  n_cores = 1
+  log = TRUE,
+  cutoff = 3
 )

Arguments

spe
-

SpatialExperiment object with the following columns in colData: sample_id, sum_umi, sum_gene

+

SpatialExperiment object

-
n_neighbors
-

Number of nearest neighbors to use for outlier detection

+
metric
+

colData QC metric to use for outlier detection

-
features
-

Vector of features to use for outlier detection

+
direction
+

Direction of outlier detection (higher, lower, or both)

-
method
-

Method to use for outlier detection (univariate or multivariate)

+
n_neighbors
+

Number of nearest neighbors to use for outlier detection

samples

Column name in colData to use for sample IDs

-
log2
-

Logical indicating whether to log2 transform the features

+
log
+

Logical indicating whether to log2 transform the features (default is TRUE)

cutoff
-

Cutoff for outlier detection

- - -
scale
-

Logical indicating whether to scale the features for LOF calculation (recommended)

- - -
minPts
-

Minimum number of points (nearest neighbors) to use for LOF calculation

- - -
data_output
-

Logical indicating whether to output the z-scores for each feature

- - -
n_cores
-

Number of cores to use for parallelization in the findKNN function

+

Cutoff for outlier detection (default is 3)

Value

-

SpatialExperiment object with updated colData

+

SpatialExperiment object with updated colData containing outputs

@@ -141,11 +127,6 @@

Examples# change from gene id to gene names rownames(spe) <- rowData(spe)$gene_name -# show column data before SpotSweepR -colnames(colData(spe)) -#> [1] "barcode_id" "sample_id" "in_tissue" "array_row" "array_col" -#> [6] "ground_truth" "cell_count" - # drop out-of-tissue spots spe <- spe[, spe$in_tissue == 1] spe <- spe[, !is.na(spe$ground_truth)] @@ -153,22 +134,12 @@

Examples# Identifying the mitochondrial transcripts in our SpatialExperiment. is.mito <- rownames(spe)[grepl("^MT-", rownames(spe))] -# Calculating QC metrics for each spot using scuttle -spe<- scuttle::addPerCellQCMetrics(spe, subsets=list(Mito=is.mito)) -colnames(colData(spe)) -#> [1] "barcode_id" "sample_id" "in_tissue" -#> [4] "array_row" "array_col" "ground_truth" -#> [7] "cell_count" "sum" "detected" -#> [10] "subsets_Mito_sum" "subsets_Mito_detected" "subsets_Mito_percent" -#> [13] "total" - +# Calculating QC features for each spot using scuttle +spe<- scuttle::addPerCellQC(spe, subsets=list(Mito=is.mito)) -features <- c('sum' ,'detected', "subsets_Mito_percent") spe<- localOutliers(spe, - features=features, - n_neighbors=36, - data_output=TRUE, - method="multivariate" + metric="detected", + direction="lower" )

diff --git a/reference/localVariance.html b/reference/localVariance.html index 21aa13e..c255937 100644 --- a/reference/localVariance.html +++ b/reference/localVariance.html @@ -146,6 +146,7 @@

Examples n_neighbors=36, name="local_mito_variance_k36" ) +#> Error in h(simpleError(msg, call)): error in evaluating the argument 'j' in selecting a method for function '[': comparison (==) is possible only for atomic and list types