Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Remove for loop" #14

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@ Package: SpotSweeper
Title: Spatially-aware quality control for spatial transcriptomics
Version: 0.99.1
Date: 2023-10-30
Authors@R: c(
Authors@R:
person("Michael", "Totty", ,"[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9292-8556")),
person("Boyi", "Guo", ,email = "[email protected]", role = c("aut"),
comment = c(ORCID = "0000-0003-2950-2349"))
)

comment = c(ORCID = "0000-0002-9292-8556"))
Description: Spatially-aware quality control (QC) software for both spot-level
and artifact-level QC in spot-based spatial transcripomics, such as 10x
Visium. These methods calculate local (nearest-neighbors) mean and variance
Expand Down
70 changes: 20 additions & 50 deletions R/findArtifacts.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,11 @@ findArtifacts <- function(
features <- c(mito_percent, mito_sum)
features_to_use <- character()
if (log2) {
# for (feature in features) {
# feature_log2 <- paste0(feature, "_log2")
# colData(spe)[feature_log2] <- log2(colData(spe)[[feature]])
# features_to_use <- c(features_to_use, feature_log2)
# }
log_mat <- apply(
colData(spe)[, features],
MARGIN = 2,
FUN = log2
)
colnames(log_mat) <- paste0(colnames(log_mat), "_log2")
# Merge to spe
colData(spe)[colnames(log_mat)] <- log_mat
features_to_use <- colnames(log_mat)

for (feature in features) {
feature_log2 <- paste0(feature, "_log2")
colData(spe)[feature_log2] <- log2(colData(spe)[[feature]])
features_to_use <- c(features_to_use, feature_log2)
}
} else {
features_to_use <- features
}
Expand All @@ -90,43 +80,23 @@ findArtifacts <- function(
spe.temp <- spe[, colData(spe)[[samples]] == sample]

# ======= Calculate local mito variance ========

var_mat <- sapply(
seq_len(n_rings),
FUN = function(i){
var_matrix <- c()
for (i in seq_len(n_rings)) {
# ==== local mito variance ==== get n neighbors for i rings
n_neighbors <- 3 * i * (i + 1)
tmp.name <- paste0("k", n_neighbors)

spe.temp <- localVariance(spe.temp,
features = mito_percent,
n_neighbors = n_neighbors,
name = tmp.name
)

colData(spe.temp)[[tmp.name]]
# # add to var_matrix
# var_matrix <- cbind(var_matrix, colData(spe.temp)[[tmp.name]])
}
)

# var_matrix <- c()
# for (i in seq_len(n_rings)) {
# # ==== local mito variance ==== get n neighbors for i rings
# n_neighbors <- 3 * i * (i + 1)
# tmp.name <- paste0("k", n_neighbors)
#
# # get local variance of mito ratio
# spe.temp <- localVariance(spe.temp,
# features = mito_percent,
# n_neighbors = n_neighbors,
# name = tmp.name
# )
#
# # add to var_matrix
# var_matrix <- cbind(var_matrix, colData(spe.temp)[[tmp.name]])
# }

# browser()

# get local variance of mito ratio
spe.temp <- localVariance(spe.temp,
features = mito_percent,
n_neighbors = n_neighbors,
name = tmp.name
)

# add to var_matrix
var_matrix <- cbind(var_matrix, colData(spe.temp)[[tmp.name]])
}


# ========== PCA and clustering ========== add mito and mito_percent to
# var dataframe
Expand Down
47 changes: 10 additions & 37 deletions R/localOutliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,8 @@ localOutliers <- function(
# Get a list of unique sample IDs
unique_sample_ids <- unique(colData(spe)[[samples]])

# Initialize named list to store each spaQC dataframe
# spaQC_list <- vector("list", length(unique_sample_ids))
spaQC_list <- sapply(unique_sample_ids, FUN = function(x) NULL)
# Initialize list to store each spaQC dataframe
spaQC_list <- vector("list", length(unique_sample_ids))

# Loop through each unique sample ID
for (sample in unique_sample_ids) {
Expand All @@ -102,46 +101,20 @@ localOutliers <- function(
k = n_neighbors, warn.ties = FALSE
)$index

dnn_aug <- cbind(
seq_len(nrow(dnn)), # Index of the center spot
dnn
)
mod_z_matrix <- apply(
dnn_aug, MARGIN = 1,
FUN = function(.idx, spaQC, metric_to_use ){
# browser()
.idx <- .idx[.idx!=0]
neighborhood <- spaQC[.idx, metric_to_use] |> as.matrix()
return(
spatialEco::outliers(neighborhood)[1] # The spot among neighbors
)
},
spaQC = spaQC,
metric_to_use = metric_to_use
)

# browser()
# Initialize a matrix to store z-scores
# mod_z_matrix <- array(NA, nrow(dnn))
mod_z_matrix <- array(NA, nrow(dnn))

# Loop through each row in the nearest neighbor index matrix
# for (i in seq_len(nrow(dnn))) {
# dnn.idx <- dnn[i, ]
# neighborhood <- spaQC[c(i, dnn.idx[dnn.idx != 0]), ][[metric_to_use]]
#
# # modified z-score
# mod_z_matrix[i] <- spatialEco::outliers(neighborhood)[1]
# }
for (i in seq_len(nrow(dnn))) {
dnn.idx <- dnn[i, ]
neighborhood <- spaQC[c(i, dnn.idx[dnn.idx != 0]), ][[metric_to_use]]

# Handle non-finite values
if(any(!is.finite(mod_z_matrix))){
warning(
sample_id,
" contains infinite value mod_z_matrix. Forcing to be 0s."
)
mod_z_matrix[!is.finite(mod_z_matrix)] <- 0
# modified z-score
mod_z_matrix[i] <- spatialEco::outliers(neighborhood)[1]
}

# Handle non-finite values
mod_z_matrix[!is.finite(mod_z_matrix)] <- 0

# find outliers based on cutoff, store in colData
metric_outliers <- paste0(metric, "_outliers")
Expand Down
163 changes: 43 additions & 120 deletions R/localVariance.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,6 @@
#' n_neighbors = 36,
#' name = "local_mito_variance_k36"
#' )
#'
#' spe2 <- localVariance(spe,
#' features = c("subsets_Mito_percent", "total"),
#' n_neighbors = 36
#' )
#'
#' plotOutliers(spe, metric="local_mito_variance_k36")
#'
Expand All @@ -65,160 +60,88 @@ localVariance <- function(spe, n_neighbors = 36,
# log2 transform specified features
features_to_use <- character()
if (log2) {
log_mat <- apply(
colData(spe)[, features],
MARGIN = 2,
FUN = log2
)
colnames(log_mat) <- paste0(colnames(log_mat), "_log2")
# for (feature in features) {
# # feature_log2 <-
# colData(spe)[paste0(feature, "_log2")] <- log2(colData(spe)[[feature]])
# # features_to_use <- c(features_to_use, feature_log2)
# }
# features_to_use <- paste0(features, "_log2")

# Merge to spe
colData(spe)[colnames(log_mat)] <- log_mat
features_to_use <- colnames(log_mat)

for (feature in features) {
feature_log2 <- paste0(feature, "_log2")
colData(spe)[feature_log2] <- log2(colData(spe)[[feature]])
features_to_use <- c(features_to_use, feature_log2)
}
} else {
features_to_use <- features
}

if (!is.null(name)) {
stopifnot(
"name has different length as feature" =
length(names) == length(features_to_use))
}

# Get a list of unique sample IDs
unique_sample_ids <- unique(colData(spe)[[samples]])

# Initialize a named list equal the length of unique_sample_ids
# spaQC_list <- vector("list", length(unique_sample_ids))
spaQC_list <- sapply(unique_sample_ids, FUN = function(x) NULL)
# Initialize list to store each spaQC dataframe
spaQC_list <- vector("list", length(unique_sample_ids))

# Loop through each unique sample ID
# NOTE: use for loop to avoid copying any large spe object.
# for (sample_id in seq_along(unique_sample_ids)) {
for (.sample_id in unique_sample_ids) {
for (sample_id in seq_along(unique_sample_ids)) {
# Subset the data for the current sample
# sample <- unique_sample_ids[sample_id]
spe_subset <- subset(spe, , sample_id == .sample_id)
sample <- unique_sample_ids[sample_id]
spe_subset <- subset(spe, , sample_id == sample)

# Create a list of spatial coordinates and qc features
spaQC <- colData(spe_subset)
# NOTE: Doens't seem useful.
# spaQC$coords <- spatialCoords(spe_subset)
spaQC$coords <- spatialCoords(spe_subset)

# Find nearest neighbors
dnn <- BiocNeighbors::findKNN(spatialCoords(spe_subset),
k = n_neighbors,
warn.ties = FALSE
)$index

# === Get local variance ===
# Initialize a matrix to store variance for each feature
# var_matrix <- matrix(NA, nrow(spaQC), length(features_to_use))
# colnames(var_matrix) <- features_to_use
#
# mean_matrix <- matrix(NA, nrow(spaQC), length(features_to_use))
# colnames(mean_matrix) <- features_to_use

# for (i in seq_len(nrow(dnn))) {
# dnn.idx <- dnn[i, ]
# for (j in seq_along(features_to_use)) {
dnn_aug <- cbind(dnn, seq_len(nrow(dnn)))
mean_var_mat <- apply(dnn_aug, 1, FUN = function(.idx, spaQC, features_to_use ){
neighborhood <- spaQC[.idx, features_to_use, drop=FALSE] |> as.matrix()
means_vec <- colMeans(neighborhood, na.rm = TRUE)
var_vec <- colVars(neighborhood, na.rm = TRUE)

names(means_vec) <- paste0(names(means_vec), "_mean")
names(var_vec) <- paste0(names(var_vec), "_var")

c(means_vec, var_vec)

# [[features_to_use[j]]]
# var_matrix[i, j] <- var(neighborhood, na.rm = TRUE)[1]
# mean_matrix[i, j] <- mean(neighborhood, na.rm = TRUE)[1]
# # Loop through each row in the nearest neighbor index matrix
#
# neighborhood <- spaQC[c(i, dnn.idx[dnn.idx != 0]), ][[features_to_use[j]]]
#
# var_matrix[i, j] <- var(neighborhood, na.rm = TRUE)[1]
# mean_matrix[i, j] <- mean(neighborhood, na.rm = TRUE)[1]
# }
# }
},
spaQC = spaQC,
features_to_use = features_to_use ) |>
t() # Transpose to spots-by-features
var_matrix <- matrix(NA, nrow(spaQC), length(features_to_use))
colnames(var_matrix) <- features_to_use

# Handle non-finite values
# var_matrix[!is.finite(var_matrix)] <- 0
# mean_matrix[!is.finite(mean_matrix)] <- 0
if(any(!is.finite(mean_var_mat))){
warning(
.sample_id,
" contains infinite value in local vars. Forcing to be 0s."
)
# NOTE to micheal: I'm not sure if this would cause problem....
mean_var_mat[!is.finite(mean_var_mat)] <- 0
mean_matrix <- matrix(NA, nrow(spaQC), length(features_to_use))
colnames(mean_matrix) <- features_to_use

# Loop through each row in the nearest neighbor index matrix
for (i in seq_len(nrow(dnn))) {
dnn.idx <- dnn[i, ]
for (j in seq_along(features_to_use)) {
neighborhood <- spaQC[c(i, dnn.idx[dnn.idx != 0]), ][[features_to_use[j]]]

var_matrix[i, j] <- var(neighborhood, na.rm = TRUE)[1]
mean_matrix[i, j] <- mean(neighborhood, na.rm = TRUE)[1]
}
}


# Handle non-finite values
var_matrix[!is.finite(var_matrix)] <- 0
mean_matrix[!is.finite(mean_matrix)] <- 0

# == Regress out mean-variance bias ==
# for (feature_idx in seq_along(features_to_use)) {
var_matrix <- sapply(
features_to_use,
FUN = function(feature, mean_var_mat){

for (feature_idx in seq_along(features_to_use)) {
# Prepare data.frame for current feature
feat_var <- log2(mean_var_mat[, paste0(feature, "_var")])
feat_mean <- log2(mean_var_mat[, paste0(feature, "_mean")])
# Prepare data.frame for current feature
# mito_var_df <- data.frame(
# mito_var = log2(var_matrix[, feature_idx]),
# mito_mean = log2(mean_matrix[, feature_idx])
# )
mito_var_df <- data.frame(
mito_var = log2(var_matrix[, feature_idx]),
mito_mean = log2(mean_matrix[, feature_idx])
)

# Perform robust linear regression (IRLS) of variance vs mean
# fit.irls <- MASS::rlm(mito_var ~ mito_mean, data = mito_var_df)
fit.irls <- MASS::rlm(feat_var ~ feat_mean)
fit.irls <- MASS::rlm(mito_var ~ mito_mean, data = mito_var_df)

# Get residuals and update the variance matrix
resid.irls <- resid(fit.irls)

# Replace original variance values with residuals
resid.irls
},
mean_var_mat = mean_var_mat
)
var_matrix[, feature_idx] <- resid.irls
}

# add local variance to spaQC dataframe
# if (!is.null(name)) {
# stopifnot(
# "name has different length as feature" =
# length(names) == length(features_to_use))
# spaQC[name] <- var_matrix[, j]
# } else {
# feature_var <- paste0(features[j], "_var")
# spaQC[feature_var] <- var_matrix[, j]
# }

if (!is.null(name)) {
stopifnot(
"name has different length as feature" =
length(names) == length(features_to_use))
spaQC[name] <- var_matrix
spaQC[name] <- var_matrix[, j]
} else {
feature_var <- paste0(features, "_var")
colnames(var_matrix) <- feature_var
spaQC[feature_var] <- var_matrix
feature_var <- paste0(features[j], "_var")
spaQC[feature_var] <- var_matrix[, j]
}

# Store the modified spaQC dataframe in the list
spaQC_list[[.sample_id]] <- spaQC
spaQC_list[[sample_id]] <- spaQC
}

# rbind the list of dataframes
Expand Down
5 changes: 0 additions & 5 deletions man/localVariance.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading