Skip to content

Commit

Permalink
Merge pull request #285 from Qile0317/master
Browse files Browse the repository at this point in the history
Sync master branch with Qile's fork & import hash
  • Loading branch information
ncborcherding authored Dec 8, 2023
2 parents 28b4c40 + 63adabf commit 6f4d1c4
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 37 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ Imports:
tidygraph,
truncdist,
utils,
VGAM
VGAM,
hash
Suggests:
BiocManager,
BiocStyle,
Expand Down
4 changes: 2 additions & 2 deletions R/combineContigs.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ combineTCR <- function(input.data,
Con.df[Con.df == "NA_NA" | Con.df == "NA;NA_NA;NA"] <- NA
data3 <- merge(data2[,-which(names(data2) %in% c("TCR1","TCR2"))],
Con.df, by = "barcode")
if (!is.null(samples) & !is.null(ID)) {
if (!is.null(samples) && !is.null(ID)) {
data3 <- data3[, c("barcode", "sample", "ID", tcr1_lines, tcr2_lines,
CT_lines)] }
else if (!is.null(samples) & is.null(ID)) {
Expand All @@ -104,7 +104,7 @@ combineTCR <- function(input.data,
}
name_vector <- character(length(samples))
for (i in seq_along(samples)) {
if (!is.null(samples) & !is.null(ID)) {
if (!is.null(samples) && !is.null(ID)) {
curr <- paste(samples[i], "_", ID[i], sep="")
} else if (!is.null(samples) & is.null(ID)) {
curr <- paste(samples[i], sep="")
Expand Down
4 changes: 2 additions & 2 deletions R/combineExpression.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ combineExpression <- function(input.data,
call_time <- Sys.time()

options( dplyr.summarise.inform = FALSE )
if (!proportion & any(cloneSize < 1)) {
if (!proportion && any(cloneSize < 1)) {
stop("Adjust the cloneSize parameter - there are groupings < 1")
}
cloneSize <- c(None = 0, cloneSize)
Expand Down Expand Up @@ -93,7 +93,7 @@ combineExpression <- function(input.data,
"clonalFrequency")]
Con.df <- rbind.data.frame(Con.df, data)
}
} else if (group.by != "none" | !is.null(group.by)) {
} else if (group.by != "none" || !is.null(group.by)) {
data <- data.frame(bind_rows(input.data), stringsAsFactors = FALSE)
data2 <- na.omit(unique(data[,c("barcode", cloneCall, group.by)]))
data2 <- data2[data2[,"barcode"] %in% cell.names, ]
Expand Down
76 changes: 59 additions & 17 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ is_seurat_or_se_object <- function(obj) {
is_seurat_object(obj) || is_se_object(obj)
}

#Use to shuffle between chains
#Use to shuffle between chains Qile: the NA handling here *might* be related to the unnamed combineTCR bugs from the new rcpp con.df construction
#' @keywords internal
#' @author Ye-Lin Son Nick Borcherding
.off.the.chain <- function(dat, chain, cloneCall) {
Expand Down Expand Up @@ -250,23 +250,10 @@ is_seurat_or_se_object <- function(obj) {
return(data1)
}


# This is to help sort the type of clonotype data to use
#' @keywords internal
.theCall <- function(df, x, check.df = TRUE) {
x <- switch(x,
"gene" = "CTgene",
"genes" = "CTgene",
"CTgene" = "CTgene",
"nt" = "CTnt",
"nucleotides" = "CTnt",
"CTnt" = "CTnt",
"aa" = "CTaa",
"amino" = "CTaa",
"CTaa" = "CTaa",
"strict" = "CTstrict",
"gene+nt" = "CTstrict",
"CTstrict" = "CTstrict")
x <- .convertClonecall(x)
if(check.df) {
if(inherits(df, "list") & !any(colnames(df[[1]]) %in% x)) {
stop("Check the clonal variabe (cloneCall) being used in the function, it does not appear in the data provided.")
Expand All @@ -277,15 +264,70 @@ is_seurat_or_se_object <- function(obj) {
return(x)
}

# helper for .theCall
.convertClonecall <- function(x) {

clonecall_dictionary <- hash::hash(
"gene" = "CTgene",
"genes" = "CTgene",
"ctgene" = "CTgene",
"ctstrict" = "CTstrict",
"nt" = "CTnt",
"nucleotide" = "CTnt",
"nucleotides" = "CTnt",
"ctnt" = "CTnt",
"aa" = "CTaa",
"amino" = "CTaa",
"ctaa" = "CTaa",
"gene+nt" = "CTstrict",
"strict" = "CTstrict",
"ctstrict" = "CTstrict"
)

x <- tolower(x)

if (!is.null(clonecall_dictionary[[x]])) {
return(clonecall_dictionary[[x]])
}

stop(paste(
"invalid input cloneCall, did you mean: '",
closest_word(
x,
c(names(clonecall_dictionary),
unname(hash::values(clonecall_dictionary)))
),
"'?",
sep = ""
))
}

# helper for .convertClonecall
closest_word <- function(s, strset) {
strset_lowercase <- tolower(strset)
s <- tolower(s)

closest_w <- strset_lowercase[1]
closest_dist <- utils::adist(s, closest_w)
for(i in 2:length(strset_lowercase)) {
curr_dist <- utils::adist(s, strset_lowercase[i])
if (curr_dist < closest_dist) {
closest_w <- strset[i]
closest_dist <- curr_dist
}
}
closest_w
}

# Assigning positions for TCR contig data
# Used to be .parseTCR(Con.df, unique_df, data2)
# Used to be .parseTCR(Con.df, unique_df, data2) in v1
# but now also constructs Con.df and runs the parseTCR algorithm on it, all in Rcpp
#' @author Gloria Kraus, Nick Bormann, Nicky de Vrij, Nick Borcherding, Qile Yang
#' @keywords internal
.constructConDfAndParseTCR <- function(data2) {
rcppConstructConDfAndParseTCR(
data2 %>% arrange(., chain, cdr3_nt),
unique(data2[[1]])
unique(data2[[1]]) # 1 is the index of the barcode column
)
}

Expand Down
2 changes: 1 addition & 1 deletion src/constructConDfAndparseTCR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class TcrParser {
TcrParser(
Rcpp::DataFrame& data2, std::vector<std::string>& uniqueData2Barcodes
) {
// construct conDf
// construct conDf, initializaing the matrix to "NA" *strings*
conDf = scRepHelper::initStringMatrix(
7, uniqueData2Barcodes.size(), "NA"
);
Expand Down
20 changes: 11 additions & 9 deletions src/ntKmers.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// 2-bit-based nucleotide kmer counting
// 2-bit-based nucleotide kmer counting - unoptimized
// could use a kmercounter class with an uint_fast64_t[128] for toNtIndex instead of the switch statement
// by Qile Yang

#include <Rcpp.h>
Expand All @@ -15,13 +16,10 @@ inline unsigned short int toNtIndex(const char nt) {
}
}

constexpr char Nts[4] = {'A', 'C', 'G', 'T'};

inline char lastNt(unsigned int index) {
switch(index & 3) {
case 0: return 'A';
case 1: return 'C';
case 2: return 'G';
default: return 'T';
}
return Nts[index & 3];
}

inline std::string toNtKmer(unsigned long int index, int k) {
Expand Down Expand Up @@ -58,6 +56,11 @@ inline void updateSkip(int& skip, const char c, const int k) {
}
}

inline bool updateSkipAndReturnIfShouldntSkip(int& skip, const char c, const int k) {
updateSkip(skip, c, k);
return skip == 0;
}

// actual kmer counter - doesnt handle _NA_ for k = 1
inline void kmerCount(std::vector<double>& bins, const unsigned int mask, const std::string& seq, const int k) {

Expand All @@ -76,8 +79,7 @@ inline void kmerCount(std::vector<double>& bins, const unsigned int mask, const

for (int i = k - 1; i < n; i++) {
kmer = ((kmer << 2) & mask) | toNtIndex(seq[i]);
updateSkip(skip, seq[i], k);
if (skip == 0) {
if (updateSkipAndReturnIfShouldntSkip(skip, seq[i], k)) {
bins[kmer]++;
}
}
Expand Down
9 changes: 4 additions & 5 deletions tests/testthat/test-combineContigs.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,12 @@ test_that("combineTCR works", {

# TODO combineTCR & combineBCR (need more edge cases, different args, errors, etc.)


test_that("combineBCR works", {

BCR <- read.csv("https://www.borch.dev/uploads/contigs/b_contigs.csv")
trial1 <- combineBCR(BCR,
samples = "Patient1")

expect_identical(trial1, getdata("combineContigs", "combineBCR_list_expected"))
})

})

0 comments on commit 6f4d1c4

Please sign in to comment.