From 57a2f6b3fde7ea591afcea248781318f5de30a4a Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 16:13:57 -0700
Subject: [PATCH 01/11] add to .gitignore

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 7163266c..2fc51a59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,6 @@ local_tests.R
 docs
 vignettes/articles/scRep_example_full.rds
 .vscode
-qile
\ No newline at end of file
+qile
+dev
+.lintr
\ No newline at end of file

From 69325adeb2b2d0c39f89c5bc58e2fd5863d35819 Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 16:19:13 -0700
Subject: [PATCH 02/11] rm trailing whitespace in loadContigs

---
 R/loadContigs.R               | 80 +++++++++++++++++------------------
 man/clonalAbundance.Rd        | 32 +++++++-------
 man/clonalSizeDistribution.Rd | 24 +++++------
 man/percentAA.Rd              | 10 ++---
 man/positionalEntropy.Rd      | 22 +++++-----
 man/positionalProperty.Rd     | 16 +++----
 man/vizGenes.Rd               | 24 +++++------
 7 files changed, 104 insertions(+), 104 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index 469c9f3f..06119d87 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -1,65 +1,65 @@
 #' Loading the contigs derived from single-cell sequencing
 #'
-#' This function generates a contig list and formats the data to allow for 
-#' function with  \code{\link{combineTCR}} or \code{\link{combineBCR}}. If 
-#' using data derived from filtered outputs of 10X Genomics, there is no 
-#' need to use this function as the data is already compatible. 
-#' 
-#' The files that this function parses includes:  
+#' This function generates a contig list and formats the data to allow for
+#' function with  \code{\link{combineTCR}} or \code{\link{combineBCR}}. If
+#' using data derived from filtered outputs of 10X Genomics, there is no
+#' need to use this function as the data is already compatible.
+#'
+#' The files that this function parses includes: 
 #' \itemize{
 #'   \item 10X =  "filtered_contig_annotations.csv"
-#'   \item AIRR = "airr_rearrangement.tsv" 
-#'   \item BD = "Contigs_AIRR.tsv" 
-#'   \item Dandelion = "all_contig_dandelion.tsv" 
-#'   \item Immcantation = "data.tsv" 
+#'   \item AIRR = "airr_rearrangement.tsv"
+#'   \item BD = "Contigs_AIRR.tsv"
+#'   \item Dandelion = "all_contig_dandelion.tsv"
+#'   \item Immcantation = "data.tsv"
 #'   \item JSON = ".json"
 #'   \item ParseBio = "barcode_report.tsv"
 #'   \item MiXCR = "clones.tsv"
-#'   \item Omniscope = ".csv" 
+#'   \item Omniscope = ".csv"
 #'   \item TRUST4 = "barcode_report.tsv"
-#'   \item WAT3R = "barcode_results.csv" 
+#'   \item WAT3R = "barcode_results.csv"
 #' }
-#' 
+#'
 #' @examples
 #' TRUST4 <- read.csv("https://www.borch.dev/uploads/contigs/TRUST4_contigs.csv")
 #' contig.list <- loadContigs(TRUST4, format = "TRUST4")
-#' 
+#'
 #' BD <- read.csv("https://www.borch.dev/uploads/contigs/BD_contigs.csv")
 #' contig.list <- loadContigs(BD, format = "BD")
-#' 
+#'
 #' WAT3R <- read.csv("https://www.borch.dev/uploads/contigs/WAT3R_contigs.csv")
 #' contig.list <- loadContigs(WAT3R, format = "WAT3R")
-#' 
+#'
 #' @param input The directory in which contigs are located or a list with contig elements
-#' @param format The format of the single-cell contig, currently supporting: 
+#' @param format The format of the single-cell contig, currently supporting:
 #' "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope", "TRUST4", and "WAT3R"
 #' @importFrom utils read.csv read.delim
 #' @importFrom rjson fromJSON
 #' @export
 #' @concept Loading_and_Processing_Contigs
-#' @return List of contigs for compatibility  with \code{\link{combineTCR}} or 
+#' @return List of contigs for compatibility  with \code{\link{combineTCR}} or
 #' \code{\link{combineBCR}}
-loadContigs <- function(input, 
+loadContigs <- function(input,
                         format = "10X") {
   #Loading from directory, recursively
   if (inherits(x=input, what ="character")) {
-    format.list <- list("WAT3R" = "barcode_results.csv", 
-                        "10X" =  "filtered_contig_annotations.csv", 
-                        "AIRR" = "airr_rearrangement.tsv", 
+    format.list <- list("WAT3R" = "barcode_results.csv",
+                        "10X" =  "filtered_contig_annotations.csv",
+                        "AIRR" = "airr_rearrangement.tsv",
                         "Dandelion" = "all_contig_dandelion.tsv",
                         "Immcantation" = "_data.tsv",
-                        "MiXCR" = "clones.tsv", 
+                        "MiXCR" = "clones.tsv",
                         "JSON" = ".json",
-                        "TRUST4" = "barcode_report.tsv", 
+                        "TRUST4" = "barcode_report.tsv",
                         "BD" = "Contigs_AIRR.tsv",
                         "Omniscope" =c("_OSB.csv", "_OST.csv"),
                         "ParseBio" = "barcode_report.tsv")
         file.pattern <- format.list[[format]]
         contig.files <- list.files(input, paste0(file.pattern, collapse = "|"), recursive = TRUE, full.names = TRUE)
-        
+       
         if (format %in% c("10X", "WAT3R", "Omniscope")) {
-          df <- lapply(contig.files, read.csv) 
-        } else if(format %in% c("json")) { 
+          df <- lapply(contig.files, read.csv)
+        } else if(format %in% c("json")) {
           df <- lapply(contig.files, function(x) {
             tmp <- as.data.frame(fromJSON(x))
           })
@@ -70,7 +70,7 @@ loadContigs <- function(input,
   } else if (inherits(x=input, what ="list") | inherits(x=input, what ="data.frame")) {
     df <- .checkList(input)
   }
-  
+ 
   loadFunc <- switch(format,
                      "10X" = .parse10x,
                      "AIRR" = .parseAIRR,
@@ -84,7 +84,7 @@ loadContigs <- function(input,
                      "Immcantation" = .parseImmcantation,
                      "ParseBio" = .parseParse,
                       stop("Invalid format provided"))
-  
+ 
   df <- loadFunc(df)
   return(df)
 }
@@ -95,7 +95,7 @@ loadContigs <- function(input,
     for (i in seq_along(df)) {
         colnames(df[[i]])[1] <- "barcode"
         df[[i]][df[[i]] == "*"] <- NA
-        
+       
         if(length(which(is.na(df[[i]]$chain1))) == length(df[[i]]$chain1)) {
           chain2 <- matrix(ncol = 7, nrow = length(df[[i]]$chain1))
         } else {
@@ -104,7 +104,7 @@ loadContigs <- function(input,
         }
         colnames(chain2) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
         chain2 <- data.frame(barcode = df[[i]][,1], chain2)
-        
+       
         if(length(which(is.na(df[[i]]$chain2))) == length(df[[i]]$chain2)) {
           chain1 <- matrix(ncol = 7, nrow = length(df[[i]]$chain2))
         } else {
@@ -129,14 +129,14 @@ loadContigs <- function(input,
         chain2 <- df[[i]][,c("BC","TRBV","TRBD","TRBJ","TRB_CDR3nuc","TRB_CDR3","TRB_nReads","TRB_CDR3_UMIcount")]
         chain2 <- data.frame(chain2[,1], chain = "TRB", chain2[,2:4], c_gene = NA, chain2[,5:8])
         colnames(chain2) <- c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "umis")
-        
+       
         #TRA Chain 1
         chain1 <-  df[[i]][,c("BC","TRAV","TRAJ","TRA_CDR3nuc","TRA_CDR3","TRA_nReads","TRA_CDR3_UMIcount")]
         chain1 <- data.frame(chain1[,1], chain = "TRA",chain1[,2], d_gene = NA, chain1[,3], c_gene = NA, chain1[,4:7])
         colnames(chain1) <- c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "umis")
         data2 <- rbind(chain1, chain2)
         data2[data2 == ""] <- NA
-        
+       
         #TRA Chain 2
         chain3 <-  df[[i]][,c("BC","TRAV.2","TRAJ.2","TRA.2_CDR3nuc","TRA.2_CDR3","TRA.2_nReads","TRA.2_CDR3_UMIcount")]
         chain3 <- data.frame(chain3[,1], chain = "TRA",chain3[,2],  d_gene = NA, chain3[,3], c_gene = NA, chain3[,4:7])
@@ -145,7 +145,7 @@ loadContigs <- function(input,
         data2[data2 == ""] <- NA
         df[[i]] <- data2
         df[[i]] <- df[[i]][with(df[[i]], order(reads, chain)),]
-        
+       
     }
     return(df)
 }
@@ -167,7 +167,7 @@ loadContigs <- function(input,
         df[[i]] <- subset(df[[i]], productive %in% c(TRUE, "TRUE", "True", "true"))
         if (nrow(df[[i]]) == 0) { stop(
             "There are 0 contigs after internal filtering -
-            check the contig list to see if any issues exist 
+            check the contig list to see if any issues exist
             for productive chains", call. = FALSE) }
         df[[i]] <- subset(df[[i]], cdr3 != "None")
         df[[i]][df[[i]] == ""] <- NA
@@ -191,7 +191,7 @@ loadContigs <- function(input,
   }
   return(df)
 }
-    
+   
 
 .parseOmniscope <- function(df) {
   for (i in seq_along(df)) {
@@ -259,20 +259,20 @@ loadContigs <- function(input,
     colnames(TRA.2) <- 1:8
     TRA <- rbind(TRA.1, TRA.2)
     TRA$chain <- "TRA"
-    
+   
     TRB.1 <- df[[i]][,c("Barcode", "TRB_V", "TRB_D", "TRB_J", "TRB_C", "TRB_cdr3_aa", "TRB_read_count", "TRB_transcript_count")]
     TRB.2 <- df[[i]][,c("Barcode", "secondary_TRB_V", "secondary_TRB_D", "secondary_TRB_J", "secondary_TRB_C", "secondary_TRB_cdr3_aa", "secondary_TRB_read_count", "secondary_TRB_transcript_count")]
     colnames(TRB.1) <- 1:8
     colnames(TRB.2) <- 1:8
     TRB <- rbind(TRB.1, TRB.2)
     TRB$chain <- "TRB"
-    
+   
     data2 <- rbind(TRA, TRB)
     data2 <- data2[rowSums(is.na(data2[2:8])) != 7, ]
     colnames(data2) <- c("barcode", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3", "reads", "umis", "chain")
     data2$cdr3_nt <- NA
     data2 <- data2[,c("barcode", "chain", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads", "umis")]
-    
+   
     df[[i]] <- data2
     df[[i]] <- df[[i]][with(df[[i]], order(reads, chain)),]
   }
@@ -283,6 +283,6 @@ loadContigs <- function(input,
   for (i in seq_along(df)) {
     df[[i]] <- df[[i]][,c("cell_id", "locus", "consensus_count", "v_call", "d_call", "j_call", "c_call", "cdr3", "cdr3_aa", "productive")]
     colnames(df[[i]]) <- c("barcode", "chain", "reads", "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "productive")
-  } 
+  }
   return(df)
 }
\ No newline at end of file
diff --git a/man/clonalAbundance.Rd b/man/clonalAbundance.Rd
index 52352220..671b04ba 100644
--- a/man/clonalAbundance.Rd
+++ b/man/clonalAbundance.Rd
@@ -16,18 +16,18 @@ clonalAbundance(
 )
 }
 \arguments{
-\item{input.data}{The product of \code{\link{combineTCR}}, 
+\item{input.data}{The product of \code{\link{combineTCR}},
 \code{\link{combineBCR}}, or \code{\link{combineExpression}}.}
 
-\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), 
+\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}),
 CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}),
-VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable 
+VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable
 in the data.}
 
-\item{chain}{indicate if both or a specific chain should be used - 
+\item{chain}{indicate if both or a specific chain should be used -
 e.g. "both", "TRA", "TRG", "IGH", "IGL"}
 
-\item{scale}{Converts the graphs into density plots in order to show 
+\item{scale}{Converts the graphs into density plots in order to show
 relative distributions.}
 
 \item{group.by}{The variable to use for grouping}
@@ -38,29 +38,29 @@ to plot groups in order}
 \item{exportTable}{Returns the data frame used for forming the graph
 to the visualization.}
 
-\item{palette}{Colors to use in visualization - input any 
+\item{palette}{Colors to use in visualization - input any
 \link[grDevices]{hcl.pals}.}
 }
 \value{
-ggplot of the total or relative abundance of clones 
+ggplot of the total or relative abundance of clones
 across quanta
 }
 \description{
-Displays the number of clones at specific frequencies by sample 
+Displays the number of clones at specific frequencies by sample
 or group. Visualization can either be a line graph (
-\strong{scale} = FALSE) using calculated numbers or density 
-plot (\strong{scale} = TRUE). Multiple sequencing runs can 
-be group together using the group parameter. If a matrix 
-output for the data is preferred, set 
+\strong{scale} = FALSE) using calculated numbers or density
+plot (\strong{scale} = TRUE). Multiple sequencing runs can
+be group together using the group parameter. If a matrix
+output for the data is preferred, set
 \strong{exportTable} = TRUE.
 }
 \examples{
 #Making combined contig data
-combined <- combineTCR(contig_list, 
-                        samples = c("P17B", "P17L", "P18B", "P18L", 
+combined <- combineTCR(contig_list,
+                        samples = c("P17B", "P17L", "P18B", "P18L",
                                     "P19B","P19L", "P20B", "P20L"))
-clonalAbundance(combined, 
-                cloneCall = "gene", 
+clonalAbundance(combined,
+                cloneCall = "gene",
                 scale = FALSE)
 
 }
diff --git a/man/clonalSizeDistribution.Rd b/man/clonalSizeDistribution.Rd
index 2d34bb37..5c400479 100644
--- a/man/clonalSizeDistribution.Rd
+++ b/man/clonalSizeDistribution.Rd
@@ -16,36 +16,36 @@ clonalSizeDistribution(
 )
 }
 \arguments{
-\item{input.data}{The product of \code{\link{combineTCR}}, 
+\item{input.data}{The product of \code{\link{combineTCR}},
 \code{\link{combineBCR}}, or \code{\link{combineExpression}}.}
 
-\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), 
+\item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}),
 CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}),
-VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable 
+VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable
 in the data.}
 
-\item{chain}{indicate if both or a specific chain should be used - 
+\item{chain}{indicate if both or a specific chain should be used -
 e.g. "both", "TRA", "TRG", "IGH", "IGL".}
 
 \item{method}{The clustering parameter for the dendrogram.}
 
-\item{threshold}{Numerical vector containing the thresholds 
+\item{threshold}{Numerical vector containing the thresholds
 the grid search was performed over.}
 
 \item{group.by}{The variable to use for grouping.}
 
 \item{exportTable}{Returns the data frame used for forming the graph.}
 
-\item{palette}{Colors to use in visualization - input any 
+\item{palette}{Colors to use in visualization - input any
 \link[grDevices]{hcl.pals}.}
 }
 \value{
 ggplot dendrogram of the clone size distribution
 }
 \description{
-This function produces a hierarchical clustering of clones by sample 
-using discrete gamma-GPD spliced threshold model. If using this 
-model please read and cite powerTCR (more info available at 
+This function produces a hierarchical clustering of clones by sample
+using discrete gamma-GPD spliced threshold model. If using this
+model please read and cite powerTCR (more info available at
 \href{https://pubmed.ncbi.nlm.nih.gov/30485278/}{PMID: 30485278}).
 }
 \details{
@@ -59,7 +59,7 @@ Where:
   \item{\eqn{\xi} is a shape parameter}
   \item{\eqn{x \ge \mu} if \eqn{\xi \ge 0} and \eqn{\mu \le x \le \mu - \sigma/\xi} if \eqn{\xi < 0}}
 }
-              
+
 The probability density function (pdf) for the \strong{Gamma Distribution} is given by:
 \deqn{f(x|\alpha, \beta) = \frac{x^{\alpha-1} e^{-x/\beta}}{\beta^\alpha \Gamma(\alpha)}}
 
@@ -73,8 +73,8 @@ Where:
 }
 \examples{
 #Making combined contig data
-combined <- combineTCR(contig_list, 
-                        samples = c("P17B", "P17L", "P18B", "P18L", 
+combined <- combineTCR(contig_list,
+                        samples = c("P17B", "P17L", "P18B", "P18L",
                                     "P19B","P19L", "P20B", "P20L"))
 clonalSizeDistribution(combined, cloneCall = "strict", method="ward.D2")
 
diff --git a/man/percentAA.Rd b/man/percentAA.Rd
index eac3893d..27703a41 100644
--- a/man/percentAA.Rd
+++ b/man/percentAA.Rd
@@ -35,16 +35,16 @@ to plot groups in order}
 ggplot of stacked bar graphs of amino acid proportions
 }
 \description{
-This function the proportion of amino acids along the residues 
+This function the proportion of amino acids along the residues
 of the CDR3 amino acid sequence.
 }
 \examples{
 #Making combined contig data
-combined <- combineTCR(contig_list, 
-                        samples = c("P17B", "P17L", "P18B", "P18L", 
+combined <- combineTCR(contig_list,
+                        samples = c("P17B", "P17L", "P18B", "P18L",
                                     "P19B","P19L", "P20B", "P20L"))
-percentAA(combined, 
-          chain = "TRB", 
+percentAA(combined,
+          chain = "TRB",
           aa.length = 20)
 }
 \concept{Summarize_Repertoire}
diff --git a/man/positionalEntropy.Rd b/man/positionalEntropy.Rd
index 0b79ca92..ed74563e 100644
--- a/man/positionalEntropy.Rd
+++ b/man/positionalEntropy.Rd
@@ -16,7 +16,7 @@ positionalEntropy(
 )
 }
 \arguments{
-\item{input.data}{The product of \code{\link{combineTCR}}, 
+\item{input.data}{The product of \code{\link{combineTCR}},
 \code{\link{combineBCR}}, or \code{\link{combineExpression}}}
 
 \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL"}
@@ -28,7 +28,7 @@ to plot groups in order}
 
 \item{aa.length}{The maximum length of the CDR3 amino acid sequence.}
 
-\item{method}{The method to calculate the entropy/diversity - 
+\item{method}{The method to calculate the entropy/diversity -
 "shannon", "inv.simpson", "norm.entropy"}
 
 \item{exportTable}{Returns the data frame used for forming the graph}
@@ -39,20 +39,20 @@ to plot groups in order}
 ggplot of line graph of diversity by position
 }
 \description{
-This function the diversity amino acids along the residues 
-of the CDR3 amino acid sequence. Please see 
-\code{\link{clonalDiversity}} for more information on 
-the underlying methods for diversity/entropy calculations. 
-Positions without variance will have a value reported as 0 
+This function the diversity amino acids along the residues
+of the CDR3 amino acid sequence. Please see
+\code{\link{clonalDiversity}} for more information on
+the underlying methods for diversity/entropy calculations.
+Positions without variance will have a value reported as 0
 for the purposes of comparison.
 }
 \examples{
 #Making combined contig data
-combined <- combineTCR(contig_list, 
-                        samples = c("P17B", "P17L", "P18B", "P18L", 
+combined <- combineTCR(contig_list,
+                        samples = c("P17B", "P17L", "P18B", "P18L",
                                     "P19B","P19L", "P20B", "P20L"))
-positionalEntropy(combined, 
-                  chain = "TRB", 
+positionalEntropy(combined,
+                  chain = "TRB",
                   aa.length = 20)
 }
 \concept{Summarize_Repertoire}
diff --git a/man/positionalProperty.Rd b/man/positionalProperty.Rd
index 787c33c3..d769b33c 100644
--- a/man/positionalProperty.Rd
+++ b/man/positionalProperty.Rd
@@ -16,7 +16,7 @@ positionalProperty(
 )
 }
 \arguments{
-\item{input.data}{The product of \code{\link{combineTCR}}, 
+\item{input.data}{The product of \code{\link{combineTCR}},
 \code{\link{combineBCR}}, or \code{\link{combineExpression}}}
 
 \item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL"}
@@ -39,9 +39,9 @@ to plot groups in order}
 ggplot of line graph of diversity by position
 }
 \description{
-This function calculates the mean selected property for 
-amino acids along the residues of the CDR3 amino acid sequence. 
-The ribbon surrounding the individual line represents the 95% 
+This function calculates the mean selected property for
+amino acids along the residues of the CDR3 amino acid sequence.
+The ribbon surrounding the individual line represents the 95%
 confidence interval.
 }
 \details{
@@ -59,12 +59,12 @@ More information for the individual methods can be found at the following citati
 }
 \examples{
 #Making combined contig data
-combined <- combineTCR(contig_list, 
-                        samples = c("P17B", "P17L", "P18B", "P18L", 
+combined <- combineTCR(contig_list,
+                        samples = c("P17B", "P17L", "P18B", "P18L",
                                     "P19B","P19L", "P20B", "P20L"))
-positionalProperty(combined, 
+positionalProperty(combined,
                    chain = "TRB",
-                   method = "Atchley", 
+                   method = "Atchley",
                    aa.length = 20)
 }
 \author{
diff --git a/man/vizGenes.Rd b/man/vizGenes.Rd
index 12cfeabc..5c9b8346 100644
--- a/man/vizGenes.Rd
+++ b/man/vizGenes.Rd
@@ -17,47 +17,47 @@ vizGenes(
 )
 }
 \arguments{
-\item{input.data}{The product of \code{\link{combineTCR}}, 
+\item{input.data}{The product of \code{\link{combineTCR}},
 \code{\link{combineBCR}}, or \code{\link{combineExpression}}.}
 
-\item{x.axis}{Gene segments to separate the x-axis, such as "TRAV", 
+\item{x.axis}{Gene segments to separate the x-axis, such as "TRAV",
 "TRBD", "IGKJ".}
 
-\item{y.axis}{Variable to separate the y-axis, can be both categorical 
+\item{y.axis}{Variable to separate the y-axis, can be both categorical
 or other gene gene segments, such as "TRAV", "TRBD", "IGKJ".}
 
 \item{group.by}{Variable in which to group the diversity calculation.}
 
 \item{plot}{The type of plot to return - heatmap or barplot.}
 
-\item{order}{Categorical variable to organize the x-axis, either 
+\item{order}{Categorical variable to organize the x-axis, either
 "gene" or "variance"}
 
-\item{scale}{Converts the individual count of genes to proportion using 
+\item{scale}{Converts the individual count of genes to proportion using
 the total respective repertoire size}
 
 \item{exportTable}{Returns the data frame used for forming the graph.}
 
-\item{palette}{Colors to use in visualization - input any 
+\item{palette}{Colors to use in visualization - input any
 \link[grDevices]{hcl.pals}.}
 }
 \value{
 ggplot bar diagram or heatmap of gene usage
 }
 \description{
-This function will allow for the visualizing the distribution 
+This function will allow for the visualizing the distribution
 of the any VDJ and C gene of the TCR or BCR using heatmap or
-bar chart. This function requires assumes two chains were used in 
-defining clone, if not, it will default to the only chain 
+bar chart. This function requires assumes two chains were used in
+defining clone, if not, it will default to the only chain
 present regardless of the chain parameter.
 }
 \examples{
 #Making combined contig data
-combined <- combineTCR(contig_list, 
-                        samples = c("P17B", "P17L", "P18B", "P18L", 
+combined <- combineTCR(contig_list,
+                        samples = c("P17B", "P17L", "P18B", "P18L",
                                     "P19B","P19L", "P20B", "P20L"))
 
-vizGenes(combined, 
+vizGenes(combined,
          x.axis = "TRBV",
          y.axis = NULL,
          plot = "heatmap")

From 2563a640e28d81091bb4b50e1e7792975b42775b Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 16:39:12 -0700
Subject: [PATCH 03/11] re-format & document & refactor loadContigs

---
 R/loadContigs.R    | 110 ++++++++++++++++++++++++++-------------------
 man/loadContigs.Rd |  27 +++++------
 2 files changed, 78 insertions(+), 59 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index 06119d87..5da719f6 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -32,61 +32,79 @@
 #'
 #' @param input The directory in which contigs are located or a list with contig elements
 #' @param format The format of the single-cell contig, currently supporting:
-#' "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope", "TRUST4", and "WAT3R"
+#' "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope",
+#' "TRUST4", "WAT3R", and "Immcantation"
 #' @importFrom utils read.csv read.delim
 #' @importFrom rjson fromJSON
 #' @export
 #' @concept Loading_and_Processing_Contigs
 #' @return List of contigs for compatibility  with \code{\link{combineTCR}} or
 #' \code{\link{combineBCR}}
-loadContigs <- function(input,
-                        format = "10X") {
-  #Loading from directory, recursively
-  if (inherits(x=input, what ="character")) {
-    format.list <- list("WAT3R" = "barcode_results.csv",
-                        "10X" =  "filtered_contig_annotations.csv",
-                        "AIRR" = "airr_rearrangement.tsv",
-                        "Dandelion" = "all_contig_dandelion.tsv",
-                        "Immcantation" = "_data.tsv",
-                        "MiXCR" = "clones.tsv",
-                        "JSON" = ".json",
-                        "TRUST4" = "barcode_report.tsv",
-                        "BD" = "Contigs_AIRR.tsv",
-                        "Omniscope" =c("_OSB.csv", "_OST.csv"),
-                        "ParseBio" = "barcode_report.tsv")
+loadContigs <- function(input, format = "10X") {
+
+    assert_that(is.string(input) || is.list(input) || is.data.frame(input))
+    assert_that(is.string(format))
+    assert_that(format %in% c(
+        "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio",
+        "Omniscope", "TRUST4", "WAT3R", "Immcantation"
+    ))
+
+    #Loading from directory, recursively
+    df <- if (inherits(x = input, what = "character")) {
+
+        format.list <- list("WAT3R" = "barcode_results.csv",
+                            "10X" =  "filtered_contig_annotations.csv",
+                            "AIRR" = "airr_rearrangement.tsv",
+                            "Dandelion" = "all_contig_dandelion.tsv",
+                            "Immcantation" = "_data.tsv",
+                            "MiXCR" = "clones.tsv",
+                            "JSON" = ".json",
+                            "TRUST4" = "barcode_report.tsv",
+                            "BD" = "Contigs_AIRR.tsv",
+                            "Omniscope" =c("_OSB.csv", "_OST.csv"),
+                            "ParseBio" = "barcode_report.tsv")
         file.pattern <- format.list[[format]]
-        contig.files <- list.files(input, paste0(file.pattern, collapse = "|"), recursive = TRUE, full.names = TRUE)
-       
-        if (format %in% c("10X", "WAT3R", "Omniscope")) {
-          df <- lapply(contig.files, read.csv)
-        } else if(format %in% c("json")) {
-          df <- lapply(contig.files, function(x) {
-            tmp <- as.data.frame(fromJSON(x))
-          })
+        contig.files <- list.files(
+            input,
+            paste0(file.pattern, collapse = "|"),
+            recursive = TRUE,
+            full.names = TRUE
+        )
+
+        if (length(contig.files) == 0) {
+            warning("No files found in the directory")
+            return(list())
+        }
+
+        reader <- if (format == "json") {
+            function(x) as.data.frame(fromJSON(x))
+        } else if (format %in% c("10X", "WAT3R", "Omniscope")) {
+            read.csv
         } else {
-          df <- lapply(contig.files, read.delim)
+            read.delim
         }
-  #Already loaded list or data frame
-  } else if (inherits(x=input, what ="list") | inherits(x=input, what ="data.frame")) {
-    df <- .checkList(input)
-  }
- 
-  loadFunc <- switch(format,
-                     "10X" = .parse10x,
-                     "AIRR" = .parseAIRR,
-                     "Dandelion" = .parseDandelion,
-                     "JSON" = .parseJSON,
-                     "MiXCR" = .parseMiXCR,
-                     "TRUST4" = .parseTRUST4,
-                     "BD" = .parseBD,
-                     "WAT3R"  = .parseWAT3R,
-                     "Omniscope" = .parseOmniscope,
-                     "Immcantation" = .parseImmcantation,
-                     "ParseBio" = .parseParse,
-                      stop("Invalid format provided"))
- 
-  df <- loadFunc(df)
-  return(df)
+
+        lapply(contig.files, reader)
+
+    } else { # handle an already loaded list of dfs / 1 df
+        .checkList(input)
+    }
+
+    loadFunc <- switch(format,
+        "10X" = .parse10x,
+        "AIRR" = .parseAIRR,
+        "Dandelion" = .parseDandelion,
+        "JSON" = .parseJSON,
+        "MiXCR" = .parseMiXCR,
+        "TRUST4" = .parseTRUST4,
+        "BD" = .parseBD,
+        "WAT3R"  = .parseWAT3R,
+        "Omniscope" = .parseOmniscope,
+        "Immcantation" = .parseImmcantation,
+        "ParseBio" = .parseParse
+    )
+
+    loadFunc(df)
 }
 
 #Formats TRUST4 data
diff --git a/man/loadContigs.Rd b/man/loadContigs.Rd
index a96001be..186be719 100644
--- a/man/loadContigs.Rd
+++ b/man/loadContigs.Rd
@@ -9,33 +9,34 @@ loadContigs(input, format = "10X")
 \arguments{
 \item{input}{The directory in which contigs are located or a list with contig elements}
 
-\item{format}{The format of the single-cell contig, currently supporting: 
-"10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope", "TRUST4", and "WAT3R"}
+\item{format}{The format of the single-cell contig, currently supporting:
+"10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope",
+"TRUST4", "WAT3R", and "Immcantation"}
 }
 \value{
-List of contigs for compatibility  with \code{\link{combineTCR}} or 
+List of contigs for compatibility  with \code{\link{combineTCR}} or
 \code{\link{combineBCR}}
 }
 \description{
-This function generates a contig list and formats the data to allow for 
-function with  \code{\link{combineTCR}} or \code{\link{combineBCR}}. If 
-using data derived from filtered outputs of 10X Genomics, there is no 
+This function generates a contig list and formats the data to allow for
+function with  \code{\link{combineTCR}} or \code{\link{combineBCR}}. If
+using data derived from filtered outputs of 10X Genomics, there is no
 need to use this function as the data is already compatible.
 }
 \details{
-The files that this function parses includes:  
+The files that this function parses includes: 
 \itemize{
   \item 10X =  "filtered_contig_annotations.csv"
-  \item AIRR = "airr_rearrangement.tsv" 
-  \item BD = "Contigs_AIRR.tsv" 
-  \item Dandelion = "all_contig_dandelion.tsv" 
-  \item Immcantation = "data.tsv" 
+  \item AIRR = "airr_rearrangement.tsv"
+  \item BD = "Contigs_AIRR.tsv"
+  \item Dandelion = "all_contig_dandelion.tsv"
+  \item Immcantation = "data.tsv"
   \item JSON = ".json"
   \item ParseBio = "barcode_report.tsv"
   \item MiXCR = "clones.tsv"
-  \item Omniscope = ".csv" 
+  \item Omniscope = ".csv"
   \item TRUST4 = "barcode_report.tsv"
-  \item WAT3R = "barcode_results.csv" 
+  \item WAT3R = "barcode_results.csv"
 }
 }
 \examples{

From 88e6c9d7e82f3d2a1460176c83e07faefc593fec Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 16:47:37 -0700
Subject: [PATCH 04/11] slight improvement to loadContigs regex, pinpoint 429
 cause

---
 R/loadContigs.R | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index 5da719f6..2423fde0 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -52,21 +52,23 @@ loadContigs <- function(input, format = "10X") {
     #Loading from directory, recursively
     df <- if (inherits(x = input, what = "character")) {
 
-        format.list <- list("WAT3R" = "barcode_results.csv",
-                            "10X" =  "filtered_contig_annotations.csv",
-                            "AIRR" = "airr_rearrangement.tsv",
-                            "Dandelion" = "all_contig_dandelion.tsv",
-                            "Immcantation" = "_data.tsv",
-                            "MiXCR" = "clones.tsv",
-                            "JSON" = ".json",
-                            "TRUST4" = "barcode_report.tsv",
-                            "BD" = "Contigs_AIRR.tsv",
-                            "Omniscope" =c("_OSB.csv", "_OST.csv"),
-                            "ParseBio" = "barcode_report.tsv")
+        format.list <- list(
+            "WAT3R" = "barcode_results.csv",
+            "10X" =  "filtered_contig_annotations.csv",
+            "AIRR" = "airr_rearrangement.tsv",
+            "Dandelion" = "all_contig_dandelion.tsv",
+            "Immcantation" = "_data.tsv",
+            "MiXCR" = "clones.tsv",
+            "JSON" = ".json",
+            "TRUST4" = "barcode_report.tsv",
+            "BD" = "Contigs_AIRR.tsv",
+            "Omniscope" = c("_OSB.csv", "_OST.csv"),
+            "ParseBio" = "barcode_report.tsv"
+        )
         file.pattern <- format.list[[format]]
         contig.files <- list.files(
             input,
-            paste0(file.pattern, collapse = "|"),
+            paste0("*", file.pattern, "$", collapse = "|"),
             recursive = TRUE,
             full.names = TRUE
         )
@@ -129,7 +131,7 @@ loadContigs <- function(input, format = "10X") {
           chain1 <- str_split(df[[i]]$chain2, ",", simplify = TRUE)[,seq_len(7)]
           chain1[chain1 == "*"] <- "None"
         }
-        colnames(chain1) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
+        colnames(chain1) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads") # issue 429
         chain1 <- data.frame(barcode = df[[i]][,1], chain1)
         data2 <- rbind(chain1, chain2)
         data2[data2 == ""] <- NA

From a51807bb88172f520e64d1c51761037b0d09c110 Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 16:51:13 -0700
Subject: [PATCH 05/11] rename loadContig internal var

---
 R/loadContigs.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index 2423fde0..aad79b7d 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -50,7 +50,7 @@ loadContigs <- function(input, format = "10X") {
     ))
 
     #Loading from directory, recursively
-    df <- if (inherits(x = input, what = "character")) {
+    rawDataDfList <- if (inherits(x = input, what = "character")) {
 
         format.list <- list(
             "WAT3R" = "barcode_results.csv",
@@ -106,7 +106,7 @@ loadContigs <- function(input, format = "10X") {
         "ParseBio" = .parseParse
     )
 
-    loadFunc(df)
+    loadFunc(rawDataDfList)
 }
 
 #Formats TRUST4 data

From 3fdfc95ec08c074639874c67d5e5db5658ea24bc Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 17:03:11 -0700
Subject: [PATCH 06/11] reformat TRUST4 parser

---
 R/loadContigs.R | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index aad79b7d..53b66b02 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -109,30 +109,30 @@ loadContigs <- function(input, format = "10X") {
     loadFunc(rawDataDfList)
 }
 
-#Formats TRUST4 data
+#' Formats TRUST4 data
 #' @importFrom stringr str_split
 .parseTRUST4 <- function(df) {
     for (i in seq_along(df)) {
         colnames(df[[i]])[1] <- "barcode"
         df[[i]][df[[i]] == "*"] <- NA
-       
+
         if(length(which(is.na(df[[i]]$chain1))) == length(df[[i]]$chain1)) {
-          chain2 <- matrix(ncol = 7, nrow = length(df[[i]]$chain1))
+            chain2 <- matrix(ncol = 7, nrow = length(df[[i]]$chain1))
         } else {
-          chain2 <- str_split(df[[i]]$chain1, ",", simplify = TRUE)[,seq_len(7)]
-          chain2[chain2 == "*"] <- "None"
+            chain2 <- str_split(df[[i]]$chain1, ",", simplify = TRUE)[, seq_len(7)]
+            chain2[chain2 == "*"] <- "None"
         }
         colnames(chain2) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
-        chain2 <- data.frame(barcode = df[[i]][,1], chain2)
-       
+        chain2 <- data.frame(barcode = df[[i]][, 1], chain2)
+
         if(length(which(is.na(df[[i]]$chain2))) == length(df[[i]]$chain2)) {
-          chain1 <- matrix(ncol = 7, nrow = length(df[[i]]$chain2))
+            chain1 <- matrix(ncol = 7, nrow = length(df[[i]]$chain2))
         } else {
-          chain1 <- str_split(df[[i]]$chain2, ",", simplify = TRUE)[,seq_len(7)]
-          chain1[chain1 == "*"] <- "None"
+            chain1 <- str_split(df[[i]]$chain2, ",", simplify = TRUE)[, seq_len(7)]
+            chain1[chain1 == "*"] <- "None"
         }
         colnames(chain1) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads") # issue 429
-        chain1 <- data.frame(barcode = df[[i]][,1], chain1)
+        chain1 <- data.frame(barcode = df[[i]][, 1], chain1)
         data2 <- rbind(chain1, chain2)
         data2[data2 == ""] <- NA
         df[[i]] <- data2

From bd77186f456b4a17373dd6d286ff8778bccb7b02 Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 17:08:15 -0700
Subject: [PATCH 07/11] more reformatting of TRUST4

---
 R/loadContigs.R | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index 53b66b02..3513159e 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -5,7 +5,7 @@
 #' using data derived from filtered outputs of 10X Genomics, there is no
 #' need to use this function as the data is already compatible.
 #'
-#' The files that this function parses includes: 
+#' The files that this function parses includes:
 #' \itemize{
 #'   \item 10X =  "filtered_contig_annotations.csv"
 #'   \item AIRR = "airr_rearrangement.tsv"
@@ -30,7 +30,8 @@
 #' WAT3R <- read.csv("https://www.borch.dev/uploads/contigs/WAT3R_contigs.csv")
 #' contig.list <- loadContigs(WAT3R, format = "WAT3R")
 #'
-#' @param input The directory in which contigs are located or a list with contig elements
+#' @param input The directory in which contigs are located or a list with contig
+#' elements
 #' @param format The format of the single-cell contig, currently supporting:
 #' "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope",
 #' "TRUST4", "WAT3R", and "Immcantation"
@@ -112,11 +113,13 @@ loadContigs <- function(input, format = "10X") {
 #' Formats TRUST4 data
 #' @importFrom stringr str_split
 .parseTRUST4 <- function(df) {
+
     for (i in seq_along(df)) {
+
         colnames(df[[i]])[1] <- "barcode"
         df[[i]][df[[i]] == "*"] <- NA
 
-        if(length(which(is.na(df[[i]]$chain1))) == length(df[[i]]$chain1)) {
+        if (length(which(is.na(df[[i]]$chain1))) == length(df[[i]]$chain1)) {
             chain2 <- matrix(ncol = 7, nrow = length(df[[i]]$chain1))
         } else {
             chain2 <- str_split(df[[i]]$chain1, ",", simplify = TRUE)[, seq_len(7)]
@@ -125,7 +128,7 @@ loadContigs <- function(input, format = "10X") {
         colnames(chain2) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
         chain2 <- data.frame(barcode = df[[i]][, 1], chain2)
 
-        if(length(which(is.na(df[[i]]$chain2))) == length(df[[i]]$chain2)) {
+        if (length(which(is.na(df[[i]]$chain2))) == length(df[[i]]$chain2)) {
             chain1 <- matrix(ncol = 7, nrow = length(df[[i]]$chain2))
         } else {
             chain1 <- str_split(df[[i]]$chain2, ",", simplify = TRUE)[, seq_len(7)]
@@ -137,8 +140,16 @@ loadContigs <- function(input, format = "10X") {
         data2[data2 == ""] <- NA
         df[[i]] <- data2
     }
-    df <- .chain.parser(df)
-    return(df)
+
+    .chain.parser(df)
+}
+
+#Grabs the chain info from v_gene
+.chain.parser <- function(df) {
+    lapply(df, function(x) {
+        x$chain <- substr(x$v_gene, 1, 3)
+        x
+    })
 }
 
 #Formats wat3r data
@@ -204,14 +215,6 @@ loadContigs <- function(input, format = "10X") {
   }
   return(df)
 }
-#Grabs the chain info from v_gene
-.chain.parser <- function(df) {
-  for (i in seq_along(df)) {
-    df[[i]]$chain <- substr(df[[i]][,"v_gene"],1,3)
-  }
-  return(df)
-}
-   
 
 .parseOmniscope <- function(df) {
   for (i in seq_along(df)) {

From df2515ed8b1fd57c00ac634e8f1e1927bfd14661 Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 17:26:49 -0700
Subject: [PATCH 08/11] fix .parseTRUST4 for 1 row edgecase

---
 R/loadContigs.R    | 10 +++++-----
 man/loadContigs.Rd |  5 +++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index 3513159e..c71bd541 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -110,7 +110,7 @@ loadContigs <- function(input, format = "10X") {
     loadFunc(rawDataDfList)
 }
 
-#' Formats TRUST4 data
+#Formats TRUST4 data
 #' @importFrom stringr str_split
 .parseTRUST4 <- function(df) {
 
@@ -122,7 +122,7 @@ loadContigs <- function(input, format = "10X") {
         if (length(which(is.na(df[[i]]$chain1))) == length(df[[i]]$chain1)) {
             chain2 <- matrix(ncol = 7, nrow = length(df[[i]]$chain1))
         } else {
-            chain2 <- str_split(df[[i]]$chain1, ",", simplify = TRUE)[, seq_len(7)]
+            chain2 <- str_split(df[[i]]$chain1, ",", simplify = TRUE)[, seq_len(7), drop = FALSE]
             chain2[chain2 == "*"] <- "None"
         }
         colnames(chain2) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
@@ -131,14 +131,14 @@ loadContigs <- function(input, format = "10X") {
         if (length(which(is.na(df[[i]]$chain2))) == length(df[[i]]$chain2)) {
             chain1 <- matrix(ncol = 7, nrow = length(df[[i]]$chain2))
         } else {
-            chain1 <- str_split(df[[i]]$chain2, ",", simplify = TRUE)[, seq_len(7)]
+            chain1 <- str_split(df[[i]]$chain2, ",", simplify = TRUE)[, seq_len(7), drop = FALSE]
             chain1[chain1 == "*"] <- "None"
         }
-        colnames(chain1) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads") # issue 429
+        colnames(chain1) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
         chain1 <- data.frame(barcode = df[[i]][, 1], chain1)
         data2 <- rbind(chain1, chain2)
         data2[data2 == ""] <- NA
-        df[[i]] <- data2
+        df[[i]] <- data2 # is it necessary to drop rows that are fully NA with an existing barcode?
     }
 
     .chain.parser(df)
diff --git a/man/loadContigs.Rd b/man/loadContigs.Rd
index 186be719..8025e626 100644
--- a/man/loadContigs.Rd
+++ b/man/loadContigs.Rd
@@ -7,7 +7,8 @@
 loadContigs(input, format = "10X")
 }
 \arguments{
-\item{input}{The directory in which contigs are located or a list with contig elements}
+\item{input}{The directory in which contigs are located or a list with contig
+elements}
 
 \item{format}{The format of the single-cell contig, currently supporting:
 "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", "Omniscope",
@@ -24,7 +25,7 @@ using data derived from filtered outputs of 10X Genomics, there is no
 need to use this function as the data is already compatible.
 }
 \details{
-The files that this function parses includes: 
+The files that this function parses includes:
 \itemize{
   \item 10X =  "filtered_contig_annotations.csv"
   \item AIRR = "airr_rearrangement.tsv"

From 4e070946b3b22d09d2dd6e4042df1b9f90d40eda Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 17:27:18 -0700
Subject: [PATCH 09/11] add dev to PR

---
 .github/workflows/R-CMD-check.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 0ce19cc8..9e047d18 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -4,7 +4,7 @@ on:
   push:
     branches: [main, master, v2]
   pull_request:
-    branches: [main, master, v2]
+    branches: [main, master, v2, dev]
 
 name: R-CMD-check
 

From af22d98bab8c21f61825e47547a11683b33d0636 Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 17:35:57 -0700
Subject: [PATCH 10/11] update NEWS

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 69719082..65bb7a8a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,7 @@
 * Removed unnecessary code remnant in ```clonalLength()```
 * Allow one sample to be plotted by ```percentVJ()```
 * Fixed issue with ```positionalProperty()``` and exportTable
+* Fixed issue with ```loadContigs()``` edgecase when TRUST4 data only has 1 row.
 
 # scRepertoire VERSION 2.0.7
 

From c18c1d0f58b629442196177c893530b4d235bb76 Mon Sep 17 00:00:00 2001
From: Qile0317 <qile0317@gmail.com>
Date: Sun, 27 Oct 2024 17:45:51 -0700
Subject: [PATCH 11/11] refactor parseTRUST4

---
 R/loadContigs.R | 49 +++++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/R/loadContigs.R b/R/loadContigs.R
index c71bd541..354e7402 100644
--- a/R/loadContigs.R
+++ b/R/loadContigs.R
@@ -114,34 +114,35 @@ loadContigs <- function(input, format = "10X") {
 #' @importFrom stringr str_split
 .parseTRUST4 <- function(df) {
 
-    for (i in seq_along(df)) {
-
-        colnames(df[[i]])[1] <- "barcode"
-        df[[i]][df[[i]] == "*"] <- NA
-
-        if (length(which(is.na(df[[i]]$chain1))) == length(df[[i]]$chain1)) {
-            chain2 <- matrix(ncol = 7, nrow = length(df[[i]]$chain1))
+    processChain <- function(data, chain_col) {
+        if (all(is.na(data[[chain_col]]))) {
+            chain <- matrix(ncol = 7, nrow = length(data[[chain_col]]))
         } else {
-            chain2 <- str_split(df[[i]]$chain1, ",", simplify = TRUE)[, seq_len(7), drop = FALSE]
-            chain2[chain2 == "*"] <- "None"
+            chain <- str_split(data[[chain_col]], ",", simplify = TRUE)
+            chain <- chain[, seq_len(7), drop = FALSE]
+            chain[chain == "*"] <- "None"
         }
-        colnames(chain2) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
-        chain2 <- data.frame(barcode = df[[i]][, 1], chain2)
-
-        if (length(which(is.na(df[[i]]$chain2))) == length(df[[i]]$chain2)) {
-            chain1 <- matrix(ncol = 7, nrow = length(df[[i]]$chain2))
-        } else {
-            chain1 <- str_split(df[[i]]$chain2, ",", simplify = TRUE)[, seq_len(7), drop = FALSE]
-            chain1[chain1 == "*"] <- "None"
-        }
-        colnames(chain1) <- c("v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads")
-        chain1 <- data.frame(barcode = df[[i]][, 1], chain1)
-        data2 <- rbind(chain1, chain2)
-        data2[data2 == ""] <- NA
-        df[[i]] <- data2 # is it necessary to drop rows that are fully NA with an existing barcode?
+        colnames(chain) <- c(
+            "v_gene", "d_gene", "j_gene", "c_gene", "cdr3_nt", "cdr3", "reads"
+        )
+        data.frame(barcode = data$barcode, chain)
     }
 
-    .chain.parser(df)
+    formattedDfs <- lapply(df, function(data) {
+
+        colnames(data)[1] <- "barcode"
+        data[data == "*"] <- NA
+
+        # not a mistake, opposite definitions in TRUST4 and scRepertoire
+        chain1 <- processChain(data, "chain2")
+        chain2 <- processChain(data, "chain1")
+
+        combined_data <- rbind(chain1, chain2)
+        combined_data[combined_data == ""] <- NA
+        combined_data
+    })
+    # is it necessary to drop rows that are fully NA with an existing barcode?
+    .chain.parser(formattedDfs)
 }
 
 #Grabs the chain info from v_gene