diff --git a/R/loadContigs.R b/R/loadContigs.R index db2552c..304442a 100644 --- a/R/loadContigs.R +++ b/R/loadContigs.R @@ -1,24 +1,24 @@ #' Loading the contigs derived from single-cell sequencing #' +#' @description #' This function generates a contig list and formats the data to allow for #' function with [combineTCR()] or [combineBCR()]. If #' using data derived from filtered outputs of 10X Genomics, there is no #' need to use this function as the data is already compatible. #' #' The files that this function parses includes: -#' \itemize{ -#' \item 10X = "filtered_contig_annotations.csv" -#' \item AIRR = "airr_rearrangement.tsv" -#' \item BD = "Contigs_AIRR.tsv" -#' \item Dandelion = "all_contig_dandelion.tsv" -#' \item Immcantation = "data.tsv" -#' \item JSON = ".json" -#' \item ParseBio = "barcode_report.tsv" -#' \item MiXCR = "clones.tsv" -#' \item Omniscope = ".csv" -#' \item TRUST4 = "barcode_report.tsv" -#' \item WAT3R = "barcode_results.csv" -#' } +#' +#' - **10X**: `"filtered_contig_annotations.csv"` +#' - **AIRR**: `"airr_rearrangement.tsv"` +#' - **BD**: `"Contigs_AIRR.tsv"` +#' - **Dandelion**: `"all_contig_dandelion.tsv"` +#' - **Immcantation**: `"data.tsv"` +#' - **JSON**: `".json"` +#' - **ParseBio**: `"barcode_report.tsv"` +#' - **MiXCR**: `"clones.tsv"` +#' - **Omniscope**: `".csv"` +#' - **TRUST4**: `"barcode_report.tsv"` +#' - **WAT3R**: `"barcode_results.csv"` #' #' @examples #' TRUST4 <- read.csv("https://www.borch.dev/uploads/contigs/TRUST4_contigs.csv") @@ -40,15 +40,19 @@ #' @export #' @concept Loading_and_Processing_Contigs #' @return List of contigs for compatibility with [combineTCR()] or -#' [combineBCR()] +#' [combineBCR()]. Note that rows which are fully NA are dropped from the +#' final output. +#' loadContigs <- function(input, format = "10X") { - assert_that(is.string(input) || is.list(input) || is.data.frame(input)) - assert_that(is.string(format)) - assert_that(format %in% c( - "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", - "Omniscope", "TRUST4", "WAT3R", "Immcantation" - )) + assert_that( + is.string(input) || is.list(input) || is.data.frame(input), + is.string(format), + isIn(format, c( + "10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio", + "Omniscope", "TRUST4", "WAT3R", "Immcantation" + )) + ) #Loading from directory, recursively rawDataDfList <- if (inherits(x = input, what = "character")) { @@ -107,7 +111,15 @@ loadContigs <- function(input, format = "10X") { "ParseBio" = .parseParse ) - loadFunc(rawDataDfList) + rmAllNaRowsFromLoadContigs(loadFunc(rawDataDfList)) +} + +rmAllNaRowsFromLoadContigs <- function(dfList) { + cols <- colnames(dfList[[1]]) + cols <- cols[cols != "barcode"] + lapply(dfList, function(x) { + x[rowSums(!is.na(x[cols])) > 0, ] + }) } #Formats TRUST4 data diff --git a/R/typecheck.R b/R/typecheck.R index bed534b..21a55d4 100644 --- a/R/typecheck.R +++ b/R/typecheck.R @@ -38,6 +38,10 @@ assertthat::on_failure(is_named_numeric) <- function(call, env) { # functions -assertthat::on_failure(`%in%`) <- function(call, env) { +isIn <- function(x, table) { + x %in% table +} + +assertthat::on_failure(isIn) <- function(call, env) { paste0(deparse(call$x), " is not in ", deparse(call$table)) } diff --git a/man/loadContigs.Rd b/man/loadContigs.Rd index 2d794d9..331a3e6 100644 --- a/man/loadContigs.Rd +++ b/man/loadContigs.Rd @@ -16,28 +16,28 @@ elements} } \value{ List of contigs for compatibility with \code{\link[=combineTCR]{combineTCR()}} or -\code{\link[=combineBCR]{combineBCR()}} +\code{\link[=combineBCR]{combineBCR()}}. Note that rows which are fully NA are dropped from the +final output. } \description{ This function generates a contig list and formats the data to allow for function with \code{\link[=combineTCR]{combineTCR()}} or \code{\link[=combineBCR]{combineBCR()}}. If using data derived from filtered outputs of 10X Genomics, there is no need to use this function as the data is already compatible. -} -\details{ + The files that this function parses includes: \itemize{ -\item 10X = "filtered_contig_annotations.csv" -\item AIRR = "airr_rearrangement.tsv" -\item BD = "Contigs_AIRR.tsv" -\item Dandelion = "all_contig_dandelion.tsv" -\item Immcantation = "data.tsv" -\item JSON = ".json" -\item ParseBio = "barcode_report.tsv" -\item MiXCR = "clones.tsv" -\item Omniscope = ".csv" -\item TRUST4 = "barcode_report.tsv" -\item WAT3R = "barcode_results.csv" +\item \strong{10X}: \code{"filtered_contig_annotations.csv"} +\item \strong{AIRR}: \code{"airr_rearrangement.tsv"} +\item \strong{BD}: \code{"Contigs_AIRR.tsv"} +\item \strong{Dandelion}: \code{"all_contig_dandelion.tsv"} +\item \strong{Immcantation}: \code{"data.tsv"} +\item \strong{JSON}: \code{".json"} +\item \strong{ParseBio}: \code{"barcode_report.tsv"} +\item \strong{MiXCR}: \code{"clones.tsv"} +\item \strong{Omniscope}: \code{".csv"} +\item \strong{TRUST4}: \code{"barcode_report.tsv"} +\item \strong{WAT3R}: \code{"barcode_results.csv"} } } \examples{ diff --git a/tests/testthat/test-loadContigs.R b/tests/testthat/test-loadContigs.R index 9d22378..90c68b5 100644 --- a/tests/testthat/test-loadContigs.R +++ b/tests/testthat/test-loadContigs.R @@ -1,62 +1,101 @@ # test script for loadContigs.R - testcases are NOT comprehensive! test_that("loadContigs works", { - TRUST4 <- read.csv("https://www.borch.dev/uploads/contigs/TRUST4_contigs.csv") - trial1 <- loadContigs(TRUST4, format = "TRUST4") - expect_identical(trial1, - getdata("load", "loadContigs_TRUST4") - ) - BD <- read.csv("https://www.borch.dev/uploads/contigs/BD_contigs.csv") trial2 <- loadContigs(BD, format = "BD") expect_identical(trial2, - getdata("load", "loadContigs_BD") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_BD")) ) WAT3R <- read.csv("https://www.borch.dev/uploads/contigs/WAT3R_contigs.csv") trial3 <- loadContigs(WAT3R, format = "WAT3R") expect_identical(trial3, - getdata("load", "loadContigs_WAT3R") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_WAT3R")) ) data("contig_list") trial4 <- loadContigs(contig_list[[1]], format = "10X") expect_identical(trial4, - getdata("load", "loadContigs_10x") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_10x")) ) MIXCR <- read.csv("https://www.borch.dev/uploads/contigs/MIXCR_contigs.csv") trial5 <- loadContigs(MIXCR, format = "MiXCR") expect_identical(trial5, - getdata("load", "loadContigs_MiXCR") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_MiXCR")) ) Immcantation <- read.csv("https://www.borch.dev/uploads/contigs/Immcantation_contigs.csv") trial6 <- loadContigs(Immcantation, format = "Immcantation") expect_identical(trial6, - getdata("load", "loadContigs_Immcantation") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Immcantation")) ) OS <- read.csv("https://www.borch.dev/uploads/contigs/OS_contigs2.csv") trial7 <- loadContigs(OS, format = "Omniscope") expect_identical(trial7, - getdata("load", "loadContigs_Omniscope") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Omniscope")) ) Parse <- read.csv("https://www.borch.dev/uploads/contigs/Parse_contigs.csv") trial8 <- loadContigs(Parse, format = "ParseBio") expect_identical(trial8, - getdata("load", "loadContigs_Parse") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Parse")) ) Dandelion <- read.csv("https://www.borch.dev/uploads/contigs/Dandelion_contigs.csv") trial9 <- loadContigs(Dandelion, format = "Dandelion") expect_identical(trial9, - getdata("load", "loadContigs_Dandelion") + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Dandelion")) + ) +}) + +test_that("loadContigs(format='TRUST4') works", { + + TRUST4 <- read.csv("https://www.borch.dev/uploads/contigs/TRUST4_contigs.csv") + expect_identical( + loadContigs(TRUST4, format = "TRUST4"), + rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_TRUST4")) + ) + + oneRowTrust4Input <- structure( + list( + `#barcode` = "CGTAGCGGTGATAAGT-1", + cell_type = "B", + chain1 = "*", + chain2 = "IGKV1D-43,*,IGKJ1,IGKC,TGTCAACAGTATAGTAGTGTCCCCTGGACGTTC,CQQYSSVPWTF,6.00,CGTAGCGGTGATAAGT-1_2,76.00,0", + secondary_chain1 = "*", + secondary_chain2 = "*" + ), + row.names = c(NA, -1L), + class = "data.frame" + ) + + expectedParsedTrust4Data <- list( + structure( + list( + barcode = "CGTAGCGGTGATAAGT-1", + v_gene = "IGKV1D-43", + d_gene = "None", + j_gene = "IGKJ1", + c_gene = "IGKC", + cdr3_nt = "TGTCAACAGTATAGTAGTGTCCCCTGGACGTTC", + cdr3 = "CQQYSSVPWTF", + reads = "6.00", + chain = "IGK" + ), + row.names = 1L, + class = "data.frame" + ) + ) + + expect_identical( + loadContigs(oneRowTrust4Input, format = "TRUST4"), + expectedParsedTrust4Data ) -}) +}) # TODO Add tests for .json and AIRR # TODO Would be nice to have a dir option