Merge pull request #372 from ncborcherding/dev

v2.0.4
BorchLab · May 20, 2024 · 9f6ef41 · 9f6ef41
2 parents c5e0f3c + 2d9861f
commit 9f6ef41
Show file tree

Hide file tree

Showing 88 changed files with 14,364 additions and 1,102 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: scRepertoire
 Title: A toolkit for single-cell immune receptor profiling
-Version: 2.0.3
+Version: 2.0.4
 Authors@R: c(
     person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "[email protected]"), 
     person(given = "Qile", family = "Yang", role = c("aut"), email = "[email protected]"), 

diff --git a/NAMESPACE b/NAMESPACE
@@ -103,11 +103,12 @@ importFrom(methods,slot)
 importFrom(plyr,join)
 importFrom(plyr,llply)
 importFrom(quantreg,rqss)
-importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
 importFrom(rjson,fromJSON)
+importFrom(rlang,"!!")
 importFrom(rlang,"%||%")
 importFrom(rlang,":=")
+importFrom(rlang,ensym)
 importFrom(rlang,sym)
 importFrom(stats,as.dist)
 importFrom(stats,hclust)
@@ -121,6 +122,7 @@ importFrom(stats,sd)
 importFrom(stats,setNames)
 importFrom(stringdist,stringdist)
 importFrom(stringr,str_c)
+importFrom(stringr,str_remove_all)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_replace_na)
 importFrom(stringr,str_sort)

diff --git a/NEWS.md b/NEWS.md
@@ -1,9 +1,22 @@
+# scRepertoire VERSION 2.0.4
+
+## UNDERLYING CHANGES
+* ```getCirclize()``` refactored to prevent assumptions and added **include.self** argument
+* Added ```.count.clones()``` internal function for ```getCirclize()``` and ```clonalNetwork()```
+* Added **order.by** parameter to visualizations to specifically call order of plotting using a vector or can use "alphanumeric" to plot things in order
+* Fix issue with ```clonalLength()``` and NA handling
+* ```clonalCompare()``` now retains the original clonal info if using **relabel.clones**
+* Add Dandelion support in to ```loadContigs()``` and testthat
+* Fixed issue with ```positionalProperty()``` assumption that the clones will all have 20 amino acids.
+* Fixed IGH/K/L mistaking gene issue in ```vizGenes()```
+
+
 # scRepertoire VERSION 2.0.3
 
 ## UNDERLYING CHANGES
 
 * Modified support for Omniscope format to allow for dual chains
-* Added ParseBio support int ```loadContigs()``` and testthat
+* Added ParseBio support in to ```loadContigs()``` and testthat
 * Added support for productive variable to ```loadContigs()``` for BD, Omniscope, and Immcantation formats
 * Replace numerical indexing with name indexing for ```loadContigs()```
 * ```combineBCR()``` and ```combineTCR()``` no allow for unproductive contig inclusions with new **filterNonproductive** parameter.

diff --git a/R/clonalAbundance.R b/R/clonalAbundance.R
@@ -26,6 +26,8 @@
 #' @param chain indicate if both or a specific chain should be used - 
 #' e.g. "both", "TRA", "TRG", "IGH", "IGL"
 #' @param group.by The variable to use for grouping
+#' @param order.by A vector of specific plotting order or "alphanumeric"
+#' to plot groups in order
 #' @param scale Converts the graphs into density plots in order to show 
 #' relative distributions.
 #' @param exportTable Returns the data frame used for forming the graph
@@ -42,7 +44,8 @@ clonalAbundance <- function(input.data,
                             chain = "both", 
                             scale=FALSE, 
                             group.by = NULL, 
-                            exportTable = FALSE, 
+                            order.by = NULL,
+                            exportTable = FALSE,
                             palette = "inferno") {
   Con.df <- NULL
   xlab <- "Abundance"
@@ -58,11 +61,17 @@ clonalAbundance <- function(input.data,
       data1 <- .parseContigs(input.data, i, names, cloneCall)
       label <- input.data[[i]][1,group.by]
       data1[,paste(group.by)] <- label
-      Con.df<- rbind.data.frame(Con.df, data1) }
-      Con.df <- data.frame(Con.df)
-      col <- length(unique(Con.df[,group.by]))
-      fill <- group.by
-      if (scale == TRUE) { 
+      Con.df<- rbind.data.frame(Con.df, data1) 
+    }
+    Con.df <- data.frame(Con.df)
+    col <- length(unique(Con.df[,group.by]))
+    fill <- group.by
+    if(!is.null(order.by)) {
+        Con.df <- .ordering.function(vector = order.by,
+                                     group.by = group.by, 
+                                     data.frame =  Con.df)
+    }
+    if (scale == TRUE) { 
         ylab <- "Density of Clones"
         plot <- ggplot(Con.df, aes(x=Abundance, fill=Con.df[,group.by])) +
                       geom_density(aes(y=after_stat(scaled)), 
@@ -86,7 +95,12 @@ clonalAbundance <- function(input.data,
       Con.df<- rbind.data.frame(Con.df, data1) 
     }
     Con.df <- data.frame(Con.df)
-    Con.df$values <- factor(Con.df$values, levels=names(input.data))
+    if(!is.null(order.by)) {
+      Con.df <- .ordering.function(vector = order.by,
+                                   group.by = "values", 
+                                   data.frame = Con.df)
+    }
+
     col <- length(unique(Con.df$values))
     fill <- "Samples"
     if (scale == TRUE) { 

diff --git a/R/clonalCompare.R b/R/clonalCompare.R
@@ -22,25 +22,27 @@
 #' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), 
 #' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}),
 #' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable 
-#' in the data. 
+#' in the data
 #' @param chain indicate if both or a specific chain should be used - 
-#' e.g. "both", "TRA", "TRG", "IGH", "IGL".
+#' e.g. "both", "TRA", "TRG", "IGH", "IGL"
 #' @param samples The specific samples to isolate for visualization.
-#' @param clones The specific clonal sequences of interest.
+#' @param clones The specific clonal sequences of interest
 #' @param top.clones The top number of clonal sequences per group.
 #' (e.g., top.clones = 5)
 #' @param highlight.clones Clonal sequences to highlight, if present, 
-#' all other clones returned will be grey.
+#' all other clones returned will be grey
 #' @param relabel.clones Simplify the legend of the graph by returning
-#' clones that are numerically indexed.
+#' clones that are numerically indexed
 #' @param group.by If using a single-cell object, the column header
 #' to group the new list. \strong{NULL} will return the active 
-#' identity or cluster.
+#' identity or cluster
+#' @param order.by A vector of specific plotting order or "alphanumeric"
+#' to plot groups in order
 #' @param graph The type of graph produced, either \strong{"alluvial"} 
-#' or \strong{"area"}.
-#' @param exportTable Returns the data frame used for forming the graph.
+#' or \strong{"area"}
+#' @param exportTable Returns the data frame used for forming the graph
 #' @param palette Colors to use in visualization - input any 
-#' \link[grDevices]{hcl.pals}.
+#' \link[grDevices]{hcl.pals}
 #' @import ggplot2
 #' @importFrom stringr str_sort
 #'
@@ -56,7 +58,8 @@ clonalCompare <- function(input.data,
                           top.clones = NULL,
                           highlight.clones = NULL,
                           relabel.clones = FALSE,
-                          group.by = NULL, 
+                          group.by = NULL,
+                          order.by = NULL,
                           graph = "alluvial", 
                           exportTable = FALSE, 
                           palette = "inferno"){
@@ -95,7 +98,7 @@ clonalCompare <- function(input.data,
     Con.df <- Con.df[Con.df$clones %in% top$clones,] 
   }
   if (nrow(Con.df) < length(unique(Con.df$Sample))) {
-    stop("Reasses the filtering strategies here, there are not 
+    stop("Please reasses the filtering strategies here, there are not 
             enough clones to examine.") 
   }
   #Clones relabeling
@@ -107,6 +110,7 @@ clonalCompare <- function(input.data,
     if(!is.null(highlight.clones)) {
       highlight.clones <- unname(new.clones[which(names(new.clones) %in% highlight.clones)])
     }
+    Con.df[,"original.clones"] <- Con.df[,"clones"]
     Con.df[,"clones"] <- new.clones[as.vector(Con.df[,"clones"])]
     Con.df[,"clones"] <- factor(Con.df[,"clones"], 
                                 levels = str_sort(unique(Con.df[,"clones"]), numeric = TRUE))
@@ -116,6 +120,13 @@ clonalCompare <- function(input.data,
     return(Con.df)
   }
 
+  if(!is.null(order.by)) {
+    Con.df <- .ordering.function(vector = order.by,
+                                 group.by = "Sample", 
+                                 data.frame = Con.df)
+  }
+
+
   #Plotting Functions
   plot <- ggplot(Con.df, aes(x = Sample, 
                              fill = clones, 

diff --git a/R/clonalDiversity.R b/R/clonalDiversity.R
@@ -49,19 +49,21 @@
 #' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), 
 #' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}),
 #' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable 
-#' in the data. 
+#' in the data
 #' @param chain indicate if both or a specific chain should be used - 
-#' e.g. "both", "TRA", "TRG", "IGH", "IGL".
-#' @param group.by Variable in which to combine for the diversity calculation.
-#' @param x.axis Additional variable grouping that will space the sample along the x-axis.
+#' e.g. "both", "TRA", "TRG", "IGH", "IGL"
+#' @param group.by Variable in which to combine for the diversity calculation
+#' @param order.by A vector of specific plotting order or "alphanumeric"
+#' to plot groups in order
+#' @param x.axis Additional variable grouping that will space the sample along the x-axis
 #' @param metrics The indices to use in diversity calculations - 
-#' "shannon", "inv.simpson", "norm.entropy", "gini.simpson", "chao1", "ACE".
+#' "shannon", "inv.simpson", "norm.entropy", "gini.simpson", "chao1", "ACE"
 #' @param exportTable Exports a table of the data into the global environment 
-#' in addition to the visualization.
+#' in addition to the visualization
 #' @param palette Colors to use in visualization - input any 
-#' \link[grDevices]{hcl.pals}.
+#' \link[grDevices]{hcl.pals}
 #' @param n.boots number of bootstraps to down sample in order to 
-#' get mean diversity.
+#' get mean diversity
 #' @param return.boots export boot strapped values calculated - 
 #' will automatically exportTable = TRUE.
 #' @param skip.boots remove down sampling and boot strapping from the calculation.
@@ -77,6 +79,7 @@ clonalDiversity <- function(input.data,
                             cloneCall = "strict", 
                             chain = "both",
                             group.by = NULL, 
+                            order.by = NULL,
                             x.axis = NULL, 
                             metrics = c("shannon", "inv.simpson", "norm.entropy", "gini.simpson", "chao1", "ACE"),
                             exportTable = FALSE, 
@@ -151,6 +154,12 @@ clonalDiversity <- function(input.data,
     rownames(mat) <- names(input.data)
 
     mat_melt <- suppressMessages(melt(mat, id.vars = c(group.by, x.axis)))
+    if(!is.null(order.by)) {
+      mat_melt <- .ordering.function(vector = order.by,
+                                     group.by = names(mat_melt)[1], 
+                                     mat_melt)
+    }
+
     if (x.axis == "x.axis") {
         plot <- ggplot(mat_melt, aes(x=1, y=as.numeric(value)))
     } else {

diff --git a/R/clonalHomeostasis.R b/R/clonalHomeostasis.R
@@ -23,7 +23,9 @@
 #' in the data. 
 #' @param chain indicate if both or a specific chain should be used - 
 #' e.g. "both", "TRA", "TRG", "IGH", "IGL".
-#' @param group.by The variable to use for grouping.
+#' @param group.by The variable to use for grouping
+#' @param order.by A vector of specific plotting order or "alphanumeric"
+#' to plot groups in order
 #' @param exportTable Exports a table of the data into the global 
 #' environment in addition to the visualization.
 #' @param palette Colors to use in visualization - input any 
@@ -40,6 +42,7 @@ clonalHomeostasis <- function(input.data,
                               cloneCall = "strict", 
                               chain = "both", 
                               group.by = NULL,
+                              order.by = NULL,
                               exportTable = FALSE, 
                               palette = "inferno") {
     cloneSize <- c(None = 0, cloneSize)
@@ -75,6 +78,14 @@ clonalHomeostasis <- function(input.data,
 
     #Plotting
     mat_melt <- melt(mat)
+
+    if(!is.null(order.by)) {
+      mat_melt <- .ordering.function(vector = order.by,
+                                     group.by = "Var1", 
+                                     data.frame = mat_melt)
+    }
+
+
     col <- length(unique(mat_melt$Var2))
     plot <- ggplot(mat_melt, aes(x=as.factor(Var1), y=value, fill=Var2)) +
         geom_bar(stat = "identity", position="fill", 

diff --git a/R/clonalLength.R b/R/clonalLength.R
@@ -16,17 +16,19 @@
 #' clonalLength(combined, cloneCall="aa", chain = "both")
 #'
 #' @param input.data The product of \code{\link{combineTCR}}, 
-#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}.
+#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}
 #' @param cloneCall How to call the clone - CDR3 nucleotide (\strong{nt}) 
-#' or CDR3 amino acid (\strong{aa}).
-#' @param group.by The variable to use for grouping.
+#' or CDR3 amino acid (\strong{aa})
+#' @param group.by The variable to use for grouping
+#' @param order.by A vector of specific plotting order or "alphanumeric"
+#' to plot groups in order description
 #' @param scale Converts the graphs into density plots in order to show 
 #' relative distributions.
 #' @param chain indicate if both or a specific chain should be used - 
-#' e.g. "both", "TRA", "TRG", "IGH", "IGL".
+#' e.g. "both", "TRA", "TRG", "IGH", "IGL"
 #' @param exportTable Returns the data frame used for forming the graph.
 #' @param palette Colors to use in visualization - input any 
-#' \link[grDevices]{hcl.pals}.
+#' \link[grDevices]{hcl.pals}
 #' @importFrom stringr str_split
 #' @importFrom ggplot2 ggplot
 #' @export
@@ -37,6 +39,7 @@ clonalLength <- function(input.data,
                          cloneCall = "aa", 
                          chain = "both", 
                          group.by = NULL, 
+                         order.by = NULL,
                          scale = FALSE, 
                          exportTable = FALSE, 
                          palette = "inferno") {
@@ -80,6 +83,18 @@ clonalLength <- function(input.data,
     return(Con.df) 
   }
 
+  if(!is.null(order.by)) {
+    if (!is.null(group.by)) { 
+      Con.df <- .ordering.function(vector = order.by,
+                                   group.by = group.by, 
+                                   data.frame = Con.df)
+    } else {
+      Con.df <- .ordering.function(vector = order.by,
+                                   group.by = "values", 
+                                   data.frame = Con.df)
+    }
+  }
+
   #Plotting
   if (!is.null(group.by)) { 
     fill <- group.by

diff --git a/R/clonalNetwork.R b/R/clonalNetwork.R
@@ -90,14 +90,9 @@ clonalNetwork <- function(sc.data,
         }
         #Filtering clones based on the minimum value
         min_val <- min(min)
-        table <- meta %>%
-          group_by(across(all_of(c(group.by, cloneCall)))) %>%
-          count() %>%
-          na.omit() %>%
-          arrange(desc(n)) %>%
-          mutate(cumSum = cumsum(n))
-        cut <- which.min(abs(table$cumSum - min_val))
-        clones.to.filter <- table$group.by[seq_len(cut)]
+        table <- .clone.counter(meta, group.by, cloneCall)
+        cut <- which.min(abs(table$clone.sum - min_val))
+        clones.to.filter <- table[,1][seq_len(cut)]
       } else if (is.numeric(filter.clones)) {
           #Filtering based on a numeric value
           table <- meta %>%
@@ -114,18 +109,16 @@ clonalNetwork <- function(sc.data,
 
     if(exportClones) {
       #Summarizing all the clones by group.by
-      table <- meta %>%
-        group_by(meta[,group.by], meta[, cloneCall]) %>%
-        dplyr::count() %>%
-        na.omit() %>%
-        arrange(desc(n))
+      table <- .clone.counter(meta, group.by, cloneCall)[,seq_len(3)]
       #Identifying the clones across the group by
-      clones.across.identities <- names(which(table(table[[2]]) > 1))
+      clones.across.identities <- names(which(table(table[,2]) > 1))
+      if(length(clones.across.identities) < 1) {
+        stop("No shared clones across group.by variables for the current parameters selected")
+      }
       #Getting the clones to output
-      subset.table <- as.data.frame(table)
-      subset.table <- subset.table[subset.table[,2] %in% clones.across.identities,]
-      colnames(subset.table) <- c("id", "clone", "n")
-      dupl.clones <- subset.table %>%
+      table <- table[table[,2] %in% clones.across.identities,]
+      colnames(table) <- c("id", "clone", "n")
+      dupl.clones <- table %>%
                       group_by(clone) %>%
                       summarise(sum = sum(n))%>%
                       arrange(desc(sum))