Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update functions on handling other modification bases #134

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
dcdd060
update other modification
yixuan-chen-elisa Dec 7, 2023
6d98f49
Update BSseq-mod-class.Rd
yixuan-chen-elisa Dec 7, 2023
ad982ff
Update BSseq-mod-class.R
yixuan-chen-elisa Dec 7, 2023
b6a1307
Delete R/BSseq-mod-class.R
yixuan-chen-elisa Apr 3, 2024
3f41463
Delete R/BSseq_mod_utils.R
yixuan-chen-elisa Apr 3, 2024
f102eef
Update DESCRIPTION
yixuan-chen-elisa Apr 3, 2024
7ab5871
Update NAMESPACE
yixuan-chen-elisa Apr 3, 2024
b03b774
Update NAMESPACE
yixuan-chen-elisa Apr 3, 2024
0848120
Delete man/BSseq-mod-class.Rd
yixuan-chen-elisa Apr 3, 2024
fcf0800
Delete man/BSseq_mod.Rd
yixuan-chen-elisa Apr 3, 2024
a3ebd51
Delete man/getMeth_mod.Rd
yixuan-chen-elisa Apr 3, 2024
39807ce
Delete man/getCoverage_mod.Rd
yixuan-chen-elisa Apr 3, 2024
52430b8
Update BSseq-class.R
yixuan-chen-elisa Apr 3, 2024
5c0baff
Update BSseq-class.R
yixuan-chen-elisa Apr 3, 2024
c61ce30
Update BSseq-class.R
yixuan-chen-elisa Apr 3, 2024
af50c0f
Update read.modkit.R
yixuan-chen-elisa Apr 3, 2024
f728ddf
Update NEWS.Rd
yixuan-chen-elisa Apr 3, 2024
541a77d
Update BSseq-class.Rd
yixuan-chen-elisa Apr 3, 2024
542ab68
Update BSseq-class.Rd
yixuan-chen-elisa Apr 3, 2024
b078f02
Update BSseq-class.Rd
yixuan-chen-elisa Apr 3, 2024
da762d0
Update BSseq-class.Rd
yixuan-chen-elisa Apr 3, 2024
9b311df
Update BSseq-class.Rd
yixuan-chen-elisa Apr 3, 2024
83d3cd5
Update read.modkit.Rd
yixuan-chen-elisa Apr 3, 2024
c0c1f9e
Update test_read.modkit.R
yixuan-chen-elisa Apr 3, 2024
5b64dfe
Merge branch 'devel' into update-other-mod
yixuan-chen-elisa Apr 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 51 additions & 52 deletions R/read.modkit.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,76 +3,75 @@ read.modkit <- function(files,
rmZeroCov = FALSE,
strandCollapse = TRUE){
gr_list <- list()
sampleNames <- sub("\\.bed$", "", basename(files))
sampleNames <- sub("\\.bed.gz$", "", basename(files))
if (!is.null(colData)){
rownames(colData) <- sampleNames
}

for (i in seq_along(files)){
data <- read.table(files[i], header = FALSE, sep="\t",
stringsAsFactors=FALSE, quote="")
gr <- GRanges(seqnames = data$V1,
ranges = IRanges(start = data$V2+1, end = data$V3))
mcols(gr)$m <- data$V12
mcols(gr)$cov <- data$V12 + data$V13
mcols(gr)$filter <- data$V16
if (any(data$V14 != 0)){
mcols(gr)$other_mod <- data$V14
mcols(gr)$cov_other_mod <- data$V14 + data$V13
}

if (length(unique(data$V4)) == 2){
gr <- GRanges(seqnames = data[data$V4 == "m", ]$V1,
ranges = IRanges(start = data[data$V4 == "m", ]$V2+1,
end = data[data$V4 == "m", ]$V3))

mcols(gr)$m <- data[data$V4 == "m", ]$V12
mcols(gr)$h <- data[data$V4 != "m", ]$V12
mcols(gr)$u <- data[data$V4 == "m", ]$V13
mcols(gr)$filter <- data[data$V4 == "m", ]$V16
}else{
gr <- GRanges(seqnames = data$V1,
ranges = IRanges(start = data$V2+1, end = data$V3))

mcols(gr)$m <- data$V12
mcols(gr)$u <- data$V13
mcols(gr)$filter <- data$V16
}

names(gr) <- sampleNames[i]
gr_list[[sampleNames[i]]] <- gr
}

overlap_gr <- Reduce(subsetByOverlaps, gr_list)

m_cov_list <- lapply(gr_list, function(gr){
m_u_list <- lapply(gr_list, function(gr){
overlap_data <- gr[gr %over% overlap_gr]
if (!is.null(gr$other_mod)) {
data.frame(m = overlap_data$m, cov = overlap_data$cov,
filter = overlap_data$filter,
other_mod = overlap_data$other_mod,
cov_other_mod = overlap_data$cov_other_mod)
if (!is.null(gr$h)) {
data.frame(m = overlap_data$m, u = overlap_data$u,
h = overlap_data$h, filter = overlap_data$filter)
} else {
data.frame(m = overlap_data$m, cov = overlap_data$cov,
filter = overlap_data$filter)
data.frame(m = overlap_data$m, u = overlap_data$u,
filter = overlap_data$filter)
}
})

m <- do.call(cbind, lapply(m_cov_list, `[[`, "m"))
cov <- do.call(cbind, lapply(m_cov_list, `[[`, "cov"))
filter <- do.call(cbind, lapply(m_cov_list, `[[`, "filter"))
other_mod <- do.call(cbind, lapply(m_cov_list, `[[`, "other_mod"))
cov_other_mod <- do.call(cbind, lapply(m_cov_list, `[[`, "cov_other_mod"))

bsseq_objs <- list()
bsseq_obj_m <- BSseq(M = as.matrix(m), Cov = as.matrix(cov),
Filtered = as.matrix(filter),
coef = NULL, se.coef = NULL,
pos = start(overlap_gr), trans = NULL,
parameters = NULL, pData = colData, gr = NULL,
chr = as.vector(seqnames(overlap_gr)),
sampleNames = sampleNames, rmZeroCov = rmZeroCov)
m <- do.call(cbind, lapply(m_u_list, `[[`, "m"))
u <- do.call(cbind, lapply(m_u_list, `[[`, "u"))
h <- do.call(cbind, lapply(m_u_list, `[[`, "h"))
filter <- do.call(cbind, lapply(m_u_list, `[[`, "filter"))

bsseq_objs[[1]] <- bsseq_obj_m

if (!is.null(other_mod)){
bsseq_obj_other_mod <- BSseq(M = as.matrix(other_mod),
Cov = as.matrix(cov_other_mod),
Filtered = as.matrix(filter),
coef = NULL, se.coef = NULL,
pos = start(overlap_gr), trans = NULL,
parameters = NULL, pData = colData, gr = NULL,
chr = as.vector(seqnames(overlap_gr)),
sampleNames = sampleNames, rmZeroCov = rmZeroCov)

bsseq_objs[[2]] <- bsseq_obj_other_mod
}

if (strandCollapse){
for (i in seq_along(bsseq_objs)) {
bsseq:::strandCollapse(bsseq_objs[[i]])}
}
if (!is.null(h)){
bsseq_obj <- BSseq(M = as.matrix(m + h),
Cov = as.matrix(m + u + h),
Filtered = as.matrix(filter),
coef = NULL, se.coef = NULL,
pos = start(overlap_gr), trans = NULL,
parameters = NULL, pData = colData, gr = NULL,
chr = as.vector(seqnames(overlap_gr)),
sampleNames = sampleNames, rmZeroCov = rmZeroCov)
if (strandCollapse) {strandCollapse(bsseq_obj)}
}else{
bsseq_obj <- BSseq(M = as.matrix(m), Cov = as.matrix(u + m),
Filtered = as.matrix(filter),
coef = NULL, se.coef = NULL,
pos = start(overlap_gr), trans = NULL,
parameters = NULL, pData = colData, gr = NULL,
chr = as.vector(seqnames(overlap_gr)),
sampleNames = sampleNames, rmZeroCov = rmZeroCov)
if (strandCollapse) {strandCollapse(bsseq_obj)}
}

return(bsseq_objs)
return(bsseq_obj)
}
26 changes: 12 additions & 14 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,18 @@
program which produced these files have not been supported for
years. }
\item{Minor code and documentation chnages. Fixed usage of
\code{class(x) == "y"} and added examples to some man pages.}
\item{\code{Bsseq()} is modified to store filtered matrix (ambigious
modification status) for nanopore data.}
\item{New function \code{read.modkit()} to read in methyl bed files
generated from modkit and construct Bsseq objects. When there is
no other modifications, one Bsseq object is constructed, and when
there is other modification present, its methylation and
corresponding coverage are also stored and two Bsseq objects are
constructed. }
\item{New function \code{read.modbam2bed()} to read in, extract
methylation, coverage and filtered data and create Bsseq object
from methyl bed files obtained by modbam2bed. Some instructions
for using modbam2bed is also provided.}
}
\code{class(x) == "y"} and added examples to some man pages.}
\item{\code{Bsseq()} is modified to store filtered matrix (ambigious
modification status) for nanopore data.
}
\item{New function \code{read.modkit()} to read in methyl bed files
generated from modkit and construct Bsseq objects. Other modification
such as hydroxymethylation is integrated into methylation matrix when present.
}
\item{New function \code{read.modbam2bed()} to read in, extract methylation,
coverage and filtered data and create Bsseq object from methyl bed files
obtained by modbam2bed. Some instructions for using modbam2bed is also provided.
}
}


Expand Down
Binary file not shown.
Binary file not shown.
Binary file removed inst/extdata/modkit/chr22.HG002.top1000.bed.gz
Binary file not shown.
Binary file not shown.
14 changes: 7 additions & 7 deletions man/read.modkit.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ are converted to bedMethyl (BED) files using \href{https://github.com/nanoporete
Modkit outputs modified reads, unmodified reads, ambiguous modification reads (reads where the probability was below the threshold and usually failing the lowest 10th percentile), and other modified reads.}

\subsection{modkit to Bsseq object}{
After creating BED files using modkit, the BED files are read in and the Bsseq object is constructed via \code{read.modkit()} function. The function reads in BED files, extract genomic regions, methylation, coverage, ambiguous modification status data and sample information and then construct Bsseq object using \code{BSseq} function within the package. If there are other modified reads, two Bsseq objects are constructed with the first one using major modified reads and their corresponding coverage and the second one using the other modified reads and their corresponding coverage.}
After creating BED files using modkit, the BED files are read in and the Bsseq object is constructed via \code{read.modkit()} function. The function reads in BED files, extract genomic regions, methylation, coverage, ambiguous modification status data and sample information and then construct Bsseq object using \code{BSseq} function within the package. Other modification bases such as hydroxymethylation are extracted and added to the methylation matrix when present.}
}

\examples{
# only one modification and one BSseq object is constructed.
files <- c(system.file("extdata/modkit/chr22.HG002.top1000.bed.gz", package = "bsseq"))
bsseq_nano <- bsseq::read.modkit(files, rmZeroCov = FALSE, strandCollapse=FALSE)
# No other modification present
files <- c(system.file("extdata/modkit/chr21.chr22.HG002.top1000.bed.gz", package = "bsseq"))
bsseq_nano <- read.modkit(files, rmZeroCov = FALSE, strandCollapse=FALSE)

# there is other modification, so two BSseq objects are constructed.
files <- c(system.file("extdata/modkit/chr22.HG002.top1000.other_mod.bed.gz",package = "bsseq"))
bsseq_nano <- bsseq::read.modkit(files, rmZeroCov = FALSE, strandCollapse=FALSE)
# Other modification present
files <- c(system.file("extdata/modkit/Hypo1.first50Bed.txt",package = "bsseq"))
bsseq_nano <- read.modkit(files, rmZeroCov = FALSE, strandCollapse=FALSE)
}
17 changes: 14 additions & 3 deletions tests/testthat/test_read.modkit.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
context("read.modkit")

# TODO: Re-factor read.modkit() and update tests accordingly
test_that("read.modkit() works for BED files", {
infile <- system.file("extdata", "modkit/chr22.HG002.top1000.other_mod.bed.gz",
test_that("read.modkit() works for BED files without 5hmc", {
infile <- system.file("extdata", "modkit/chr21.chr22.HG002.top1000.other_mod.bed.gz",
package = "bsseq")
bsseq <- read.modkit(files = infile,
colData = NULL,
rmZeroCov = FALSE,
strandCollapse = TRUE)


lapply(bsseq, function(x) {expect_is(x, "BSseq")})
})

test_that("read.modkit() works for BED files with 5hmc", {
infile <- system.file("extdata", "modkit/Hypo1.first40Bed.txt",
package = "bsseq")
bsseq <- read.modkit(files = infile,
colData = NULL,
rmZeroCov = FALSE,
strandCollapse = TRUE)

lapply(bsseq, function(x) {expect_is(x, "BSseq")})
})
Loading