Skip to content

Commit

Permalink
re-allow samples = NULL in combineBCR
Browse files Browse the repository at this point in the history
  • Loading branch information
Qile0317 committed Nov 26, 2024
1 parent df1f3f7 commit 9811879
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 78 deletions.
159 changes: 86 additions & 73 deletions R/combineContigs.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Adding Global Variables
# data('v_gene','j_gene', 'c_gene', 'd_gene')
# note that currently the Rcpp internals have hardcoded column names so
# if some breaking change here is made, the Rcpp code will need to be updated,
# or functions need to be adjusted to intake expected column names that
# uses these variables
utils::globalVariables(c("v_gene", "j_gene", "c_gene", "d_gene", "chain"))

heavy_lines <- c("IGH", "cdr3_aa1", "cdr3_nt1", "vgene1")
Expand All @@ -21,42 +25,42 @@ utils::globalVariables(c(
#' @title Combining the list of T cell receptor contigs into clones
#'
#' @description This function consolidates a list of TCR sequencing results to
#' the level of the individual cell barcodes. Using the **samples** and
#' **ID** parameters, the function will add the strings as prefixes to
#' prevent issues with repeated barcodes. The resulting new barcodes will
#' need to match the Seurat or SCE object in order to use,
#' [combineExpression()]. Several levels of filtering exist -
#' *removeNA*, *removeMulti*, or *filterMulti* are parameters
#' that control how the function deals with barcodes with multiple chains
#' the level of the individual cell barcodes. Using the **samples** and
#' **ID** parameters, the function will add the strings as prefixes to
#' prevent issues with repeated barcodes. The resulting new barcodes will
#' need to match the Seurat or SCE object in order to use,
#' [combineExpression()]. Several levels of filtering exist -
#' *removeNA*, *removeMulti*, or *filterMulti* are parameters
#' that control how the function deals with barcodes with multiple chains
#' recovered.
#'
#'
#' @examples
#' combined <- combineTCR(contig_list,
#' samples = c("P17B", "P17L", "P18B", "P18L",
#' combined <- combineTCR(contig_list,
#' samples = c("P17B", "P17L", "P18B", "P18L",
#' "P19B","P19L", "P20B", "P20L"))
#'
#' @param input.data List of filtered contig annotations or
#'
#' @param input.data List of filtered contig annotations or
#' outputs from [loadContigs()].
#' @param samples The labels of samples (recommended).
#' @param ID The additional sample labeling (optional).
#' @param removeNA This will remove any chain without values.
#' @param removeMulti This will remove barcodes with greater than 2 chains.
#' @param filterMulti This option will allow for the selection of the 2
#' corresponding chains with the highest expression for a single barcode.
#' @param filterNonproductive This option will allow for the removal of
#' @param filterMulti This option will allow for the selection of the 2
#' corresponding chains with the highest expression for a single barcode.
#' @param filterNonproductive This option will allow for the removal of
#' nonproductive chains if the variable exists in the contig data. Default
#' is set to TRUE to remove nonproductive contigs.
#'
#'
#' @importFrom assertthat assert_that is.flag
#' @export
#' @concept Loading_and_Processing_Contigs
#' @return List of clones for individual cell barcodes
#'
combineTCR <- function(input.data,
samples = NULL,
ID = NULL,
removeNA = FALSE,
removeMulti = FALSE,
#'
combineTCR <- function(input.data,
samples = NULL,
ID = NULL,
removeNA = FALSE,
removeMulti = FALSE,
filterMulti = FALSE,
filterNonproductive = TRUE) {

Expand All @@ -66,7 +70,7 @@ combineTCR <- function(input.data,
assert_that(is.flag(removeNA))
assert_that(is.flag(removeMulti))
assert_that(is.flag(filterMulti))

input.data <- .checkList(input.data)
input.data <- .checkContigs(input.data)
out <- NULL
Expand All @@ -80,8 +84,8 @@ combineTCR <- function(input.data,
}
input.data[[i]]$sample <- samples[i]
input.data[[i]]$ID <- ID[i]
if (filterMulti) {
input.data[[i]] <- .filteringMulti(input.data[[i]])
if (filterMulti) {
input.data[[i]] <- .filteringMulti(input.data[[i]])
}
}
#Prevents error caused by list containing elements with 0 rows
Expand All @@ -104,10 +108,10 @@ combineTCR <- function(input.data,
data2 <- .makeGenes(cellType = "T", out[[i]])
Con.df <- .constructConDfAndParseTCR(data2)
Con.df <- .assignCT(cellType = "T", Con.df)
Con.df[Con.df == "NA_NA" | Con.df == "NA;NA_NA;NA"] <- NA
data3 <- merge(data2[,-which(names(data2) %in% c("TCR1","TCR2"))],
Con.df[Con.df == "NA_NA" | Con.df == "NA;NA_NA;NA"] <- NA
data3 <- merge(data2[,-which(names(data2) %in% c("TCR1","TCR2"))],
Con.df, by = "barcode")

columns_to_include <- c("barcode")
# Conditionally add columns based on user input
if (!is.null(samples)) {
Expand All @@ -116,17 +120,17 @@ combineTCR <- function(input.data,
if (!is.null(ID)) {
columns_to_include <- c(columns_to_include, "ID")
}

# Add TCR and CT lines which are presumably always needed
columns_to_include <- c(columns_to_include, tcr1_lines, tcr2_lines, CT_lines)

# Subset the data frame based on the dynamically built list of columns
data3 <- data3[, columns_to_include]
final[[i]] <- data3

final[[i]] <- data3
}
name_vector <- character(length(samples))
for (i in seq_along(samples)) {
for (i in seq_along(samples)) {
if (!is.null(samples) && !is.null(ID)) {
curr <- paste(samples[i], "_", ID[i], sep="")
} else if (!is.null(samples) & is.null(ID)) {
Expand Down Expand Up @@ -155,61 +159,66 @@ combineTCR <- function(input.data,

#' Combining the list of B cell receptor contigs into clones
#'
#' This function consolidates a list of BCR sequencing results to the level
#' of the individual cell barcodes. Using the samples and ID parameters,
#' the function will add the strings as prefixes to prevent issues with
#' repeated barcodes. The resulting new barcodes will need to match the
#' Seurat or SCE object in order to use, [combineExpression()].
#' Unlike [combineTCR()], combineBCR produces a column
#' **CTstrict** of an index of nucleotide sequence and the
#' corresponding V gene. This index automatically calculates the
#' This function consolidates a list of BCR sequencing results to the level
#' of the individual cell barcodes. Using the samples and ID parameters,
#' the function will add the strings as prefixes to prevent issues with
#' repeated barcodes. The resulting new barcodes will need to match the
#' Seurat or SCE object in order to use, [combineExpression()].
#' Unlike [combineTCR()], combineBCR produces a column
#' **CTstrict** of an index of nucleotide sequence and the
#' corresponding V gene. This index automatically calculates the
#' Levenshtein distance between sequences with the same V gene and will
#' index sequences using a normalized Levenshtein distance with the same
#' ID. After which, clone clusters are called using the
#' [igraph::components()] function. Clones that are clustered
#' across multiple sequences will then be labeled with "Cluster" in the
#' index sequences using a normalized Levenshtein distance with the same
#' ID. After which, clone clusters are called using the
#' [igraph::components()] function. Clones that are clustered
#' across multiple sequences will then be labeled with "Cluster" in the
#' CTstrict header.
#'
#' @examples
#' #Data derived from the 10x Genomics intratumoral NSCLC B cells
#' BCR <- read.csv("https://www.borch.dev/uploads/contigs/b_contigs.csv")
#' combined <- combineBCR(BCR,
#' samples = "Patient1",
#' combined <- combineBCR(BCR,
#' samples = "Patient1",
#' threshold = 0.85)
#'
#' @param input.data List of filtered contig annotations or outputs from
#'
#' @param input.data List of filtered contig annotations or outputs from
#' [loadContigs()].
#' @param samples The labels of samples (required).
#' @param ID The additional sample labeling (optional).
#' @param call.related.clones Use the nucleotide sequence and V gene
#' to call related clones. Default is set to TRUE. FALSE will return
#' @param call.related.clones Use the nucleotide sequence and V gene
#' to call related clones. Default is set to TRUE. FALSE will return
#' a CTstrict or strict clone as V gene + amino acid sequence.
#' @param threshold The normalized edit distance to consider. The higher
#' @param threshold The normalized edit distance to consider. The higher
#' the number the more similarity of sequence will be used for clustering.
#' @param removeNA This will remove any chain without values.
#' @param removeMulti This will remove barcodes with greater than 2 chains.
#' @param filterMulti This option will allow for the selection of the
#' @param filterMulti This option will allow for the selection of the
#' highest-expressing light and heavy chains, if not calling related clones.
#' @param filterNonproductive This option will allow for the removal of
#' @param filterNonproductive This option will allow for the removal of
#' nonproductive chains if the variable exists in the contig data. Default
#' is set to TRUE to remove nonproductive contigs.
#'
#' @export
#' @concept Loading_and_Processing_Contigs
#' @return List of clones for individual cell barcodes
combineBCR <- function(input.data,
samples,
samples = NULL,
ID = NULL,
call.related.clones = TRUE,
threshold = 0.85,
removeNA = FALSE,
removeNA = FALSE,
removeMulti = FALSE,
filterMulti = TRUE,
filterNonproductive = TRUE) {

if (is.null(samples)) {
stop("combineBCR() requires the samples parameter for the calculation of edit distance.")
}

assert_that(
isListOfNonEmptyDataFrames(input.data) || isNonEmptyDataFrame(input.data),
is.character(samples),
isListOfNonEmptyDataFrames(input.data) ||
isNonEmptyDataFrame(input.data),
is.character(samples) || is.null(samples),
is.flag(call.related.clones),
is.numeric(threshold),
is.flag(removeNA),
Expand All @@ -220,7 +229,8 @@ combineBCR <- function(input.data,
final <- input.data %>%
.checkList() %>%
.checkContigs() %>%
lapply(function(x) {
unname() %>%
purrr::imap(function(x, i) {
x <- subset(x, chain %in% c("IGH", "IGK", "IGL"))
if (!is.null(ID)) x$ID <- ID[i]
if (filterNonproductive && "productive" %in% colnames(x)) {
Expand All @@ -230,15 +240,21 @@ combineBCR <- function(input.data,
# Keep IGH / IGK / IGL info in save_chain
x$save_chain <- x$chain
# Collapse IGK and IGL chains
x$chain <- ifelse(x$chain=="IGH","IGH","IGLC")
x$chain <- ifelse(x$chain == "IGH", "IGH", "IGLC")
x <- .filteringMulti(x)
# Get back IGK / IGL distinction
x$chain <- x$save_chain
x$save_chain <- NULL
}
x
}) %>%
.modifyBarcodes(samples, ID) %>%
(function(x) {
if (!is.null(samples)) {
.modifyBarcodes(x, samples, ID)
} else { # https://github.com/ncborcherding/scRepertoire/pull/450
x
}
}) %>%
lapply(function(x) {
data2 <- data.frame(x)
data2 <- .makeGenes(cellType = "B", data2)
Expand Down Expand Up @@ -270,26 +286,23 @@ combineBCR <- function(input.data,
}
final[[i]]$sample <- samples[i]
final[[i]]$ID <- ID[i]
final[[i]][final[[i]] == "NA_NA" | final[[i]] == "NA;NA_NA;NA"] <- NA
final[[i]][final[[i]] == "NA_NA" | final[[i]] == "NA;NA_NA;NA"] <- NA
if (!is.null(sample) & !is.null(ID)) {
final[[i]]<- final[[i]][, c("barcode", "sample", "ID",
final[[i]]<- final[[i]][, c("barcode", "sample", "ID",
heavy_lines[c(1,2,3)], light_lines[c(1,2,3)], CT_lines)]
}
else if (!is.null(sample) & is.null(ID)) {
final[[i]]<- final[[i]][, c("barcode", "sample",
final[[i]]<- final[[i]][, c("barcode", "sample",
heavy_lines[c(1,2,3)], light_lines[c(1,2,3)], CT_lines)]
}
}
names <- NULL
for (i in seq_along(samples)) {
if (!is.null(samples) && !is.null(ID)) {
c <- paste(samples[i], "_", ID[i], sep="")
} else if (!is.null(samples) && is.null(ID)) {
c <- paste(samples[i], sep = "")
}
names <- c(names, c)

names(final) <- if (!is.null(samples)) {
if (is.null(ID)) samples else paste0(samples, "_", ID)
} else {
paste0("S", seq_along(final))
}
names(final) <- names

for (i in seq_along(final)) {
final[[i]] <- final[[i]][!duplicated(final[[i]]$barcode),]
final[[i]] <- final[[i]][rowSums(is.na(final[[i]])) < 10, ]
Expand Down
6 changes: 3 additions & 3 deletions man/combineBCR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/combineTCR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9811879

Please sign in to comment.