Skip to content

Commit

Permalink
drop all NA rows from loadContigs output
Browse files Browse the repository at this point in the history
  • Loading branch information
Qile0317 committed Nov 8, 2024
1 parent bda1cb5 commit 19cabb6
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 51 deletions.
54 changes: 33 additions & 21 deletions R/loadContigs.R
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
#' Loading the contigs derived from single-cell sequencing
#'
#' @description
#' This function generates a contig list and formats the data to allow for
#' function with [combineTCR()] or [combineBCR()]. If
#' using data derived from filtered outputs of 10X Genomics, there is no
#' need to use this function as the data is already compatible.
#'
#' The files that this function parses includes:
#' \itemize{
#' \item 10X = "filtered_contig_annotations.csv"
#' \item AIRR = "airr_rearrangement.tsv"
#' \item BD = "Contigs_AIRR.tsv"
#' \item Dandelion = "all_contig_dandelion.tsv"
#' \item Immcantation = "data.tsv"
#' \item JSON = ".json"
#' \item ParseBio = "barcode_report.tsv"
#' \item MiXCR = "clones.tsv"
#' \item Omniscope = ".csv"
#' \item TRUST4 = "barcode_report.tsv"
#' \item WAT3R = "barcode_results.csv"
#' }
#'
#' - **10X**: `"filtered_contig_annotations.csv"`
#' - **AIRR**: `"airr_rearrangement.tsv"`
#' - **BD**: `"Contigs_AIRR.tsv"`
#' - **Dandelion**: `"all_contig_dandelion.tsv"`
#' - **Immcantation**: `"data.tsv"`
#' - **JSON**: `".json"`
#' - **ParseBio**: `"barcode_report.tsv"`
#' - **MiXCR**: `"clones.tsv"`
#' - **Omniscope**: `".csv"`
#' - **TRUST4**: `"barcode_report.tsv"`
#' - **WAT3R**: `"barcode_results.csv"`
#'
#' @examples
#' TRUST4 <- read.csv("https://www.borch.dev/uploads/contigs/TRUST4_contigs.csv")
Expand All @@ -40,15 +40,19 @@
#' @export
#' @concept Loading_and_Processing_Contigs
#' @return List of contigs for compatibility with [combineTCR()] or
#' [combineBCR()]
#' [combineBCR()]. Note that rows which are fully NA are dropped from the
#' final output.
#'
loadContigs <- function(input, format = "10X") {

assert_that(is.string(input) || is.list(input) || is.data.frame(input))
assert_that(is.string(format))
assert_that(format %in% c(
"10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio",
"Omniscope", "TRUST4", "WAT3R", "Immcantation"
))
assert_that(
is.string(input) || is.list(input) || is.data.frame(input),
is.string(format),
isIn(format, c(
"10X", "AIRR", "BD", "Dandelion", "JSON", "MiXCR", "ParseBio",
"Omniscope", "TRUST4", "WAT3R", "Immcantation"
))
)

#Loading from directory, recursively
rawDataDfList <- if (inherits(x = input, what = "character")) {
Expand Down Expand Up @@ -107,7 +111,15 @@ loadContigs <- function(input, format = "10X") {
"ParseBio" = .parseParse
)

loadFunc(rawDataDfList)
rmAllNaRowsFromLoadContigs(loadFunc(rawDataDfList))
}

rmAllNaRowsFromLoadContigs <- function(dfList) {
cols <- colnames(dfList[[1]])
cols <- cols[cols != "barcode"]
lapply(dfList, function(x) {
x[rowSums(!is.na(x[cols])) > 0, ]
})
}

#Formats TRUST4 data
Expand Down
6 changes: 5 additions & 1 deletion R/typecheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ assertthat::on_failure(is_named_numeric) <- function(call, env) {

# functions

assertthat::on_failure(`%in%`) <- function(call, env) {
isIn <- function(x, table) {
x %in% table
}

assertthat::on_failure(isIn) <- function(call, env) {
paste0(deparse(call$x), " is not in ", deparse(call$table))
}
28 changes: 14 additions & 14 deletions man/loadContigs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

69 changes: 54 additions & 15 deletions tests/testthat/test-loadContigs.R
Original file line number Diff line number Diff line change
@@ -1,62 +1,101 @@
# test script for loadContigs.R - testcases are NOT comprehensive!

test_that("loadContigs works", {
TRUST4 <- read.csv("https://www.borch.dev/uploads/contigs/TRUST4_contigs.csv")
trial1 <- loadContigs(TRUST4, format = "TRUST4")
expect_identical(trial1,
getdata("load", "loadContigs_TRUST4")
)


BD <- read.csv("https://www.borch.dev/uploads/contigs/BD_contigs.csv")
trial2 <- loadContigs(BD, format = "BD")
expect_identical(trial2,
getdata("load", "loadContigs_BD")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_BD"))
)

WAT3R <- read.csv("https://www.borch.dev/uploads/contigs/WAT3R_contigs.csv")
trial3 <- loadContigs(WAT3R, format = "WAT3R")
expect_identical(trial3,
getdata("load", "loadContigs_WAT3R")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_WAT3R"))
)

data("contig_list")
trial4 <- loadContigs(contig_list[[1]], format = "10X")
expect_identical(trial4,
getdata("load", "loadContigs_10x")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_10x"))
)


MIXCR <- read.csv("https://www.borch.dev/uploads/contigs/MIXCR_contigs.csv")
trial5 <- loadContigs(MIXCR, format = "MiXCR")
expect_identical(trial5,
getdata("load", "loadContigs_MiXCR")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_MiXCR"))
)

Immcantation <- read.csv("https://www.borch.dev/uploads/contigs/Immcantation_contigs.csv")
trial6 <- loadContigs(Immcantation, format = "Immcantation")
expect_identical(trial6,
getdata("load", "loadContigs_Immcantation")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Immcantation"))
)

OS <- read.csv("https://www.borch.dev/uploads/contigs/OS_contigs2.csv")
trial7 <- loadContigs(OS, format = "Omniscope")
expect_identical(trial7,
getdata("load", "loadContigs_Omniscope")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Omniscope"))
)

Parse <- read.csv("https://www.borch.dev/uploads/contigs/Parse_contigs.csv")
trial8 <- loadContigs(Parse, format = "ParseBio")
expect_identical(trial8,
getdata("load", "loadContigs_Parse")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Parse"))
)

Dandelion <- read.csv("https://www.borch.dev/uploads/contigs/Dandelion_contigs.csv")
trial9 <- loadContigs(Dandelion, format = "Dandelion")
expect_identical(trial9,
getdata("load", "loadContigs_Dandelion")
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_Dandelion"))
)
})

test_that("loadContigs(format='TRUST4') works", {

TRUST4 <- read.csv("https://www.borch.dev/uploads/contigs/TRUST4_contigs.csv")
expect_identical(
loadContigs(TRUST4, format = "TRUST4"),
rmAllNaRowsFromLoadContigs(getdata("load", "loadContigs_TRUST4"))
)

oneRowTrust4Input <- structure(
list(
`#barcode` = "CGTAGCGGTGATAAGT-1",
cell_type = "B",
chain1 = "*",
chain2 = "IGKV1D-43,*,IGKJ1,IGKC,TGTCAACAGTATAGTAGTGTCCCCTGGACGTTC,CQQYSSVPWTF,6.00,CGTAGCGGTGATAAGT-1_2,76.00,0",
secondary_chain1 = "*",
secondary_chain2 = "*"
),
row.names = c(NA, -1L),
class = "data.frame"
)

expectedParsedTrust4Data <- list(
structure(
list(
barcode = "CGTAGCGGTGATAAGT-1",
v_gene = "IGKV1D-43",
d_gene = "None",
j_gene = "IGKJ1",
c_gene = "IGKC",
cdr3_nt = "TGTCAACAGTATAGTAGTGTCCCCTGGACGTTC",
cdr3 = "CQQYSSVPWTF",
reads = "6.00",
chain = "IGK"
),
row.names = 1L,
class = "data.frame"
)
)

expect_identical(
loadContigs(oneRowTrust4Input, format = "TRUST4"),
expectedParsedTrust4Data
)
})
})

# TODO Add tests for .json and AIRR
# TODO Would be nice to have a dir option

0 comments on commit 19cabb6

Please sign in to comment.