-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update repenrich tool (and repenrich-edger) (#676)
* Update edgeR_repenrich.R * style R code * lint plots and update tests * update test * use macros for both tools * Drop depracted Bio.Alphabet * pipe and fix samtools command line * simplify samtools management * bedtools 2.31.1 is not compatible with repenrich python ! stay at 2.23.0 * Update .shed.yml
- Loading branch information
Showing
9 changed files
with
348 additions
and
374 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,228 +1,192 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
# A command-line interface to edgeR for use with Galaxy edger-repenrich | ||
# written by Christophe Antoniewski [email protected] 2017.05.30 | ||
|
||
|
||
# setup R error handling to go to stderr | ||
options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | ||
options(show.error.messages = FALSE, error = function() { | ||
cat(geterrmessage(), file = stderr()) | ||
q("no", 1, FALSE) | ||
}) | ||
|
||
# To not crash galaxy with an UTF8 error with not-US LC settings. | ||
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | ||
|
||
# load libraries | ||
library("getopt") | ||
library("tools") | ||
library("rjson") | ||
suppressPackageStartupMessages({ | ||
library("edgeR") | ||
library("limma") | ||
}) | ||
|
||
options(stringAsFactors = FALSE, useFancyQuotes = FALSE) | ||
args <- commandArgs(trailingOnly = TRUE) | ||
|
||
# get options, using the spec as defined by the enclosed list. | ||
# we read the options from the default: commandArgs(TRUE). | ||
spec <- matrix(c( | ||
"quiet", "q", 0, "logical", | ||
"help", "h", 0, "logical", | ||
"outfile", "o", 1, "character", | ||
"countsfile", "n", 1, "character", | ||
"factorName", "N", 1, "character", | ||
"levelNameA", "A", 1, "character", | ||
"levelNameB", "B", 1, "character", | ||
"levelAfiles", "a", 1, "character", | ||
"levelBfiles", "b", 1, "character", | ||
"alignmentA", "i", 1, "character", | ||
"alignmentB", "j", 1, "character", | ||
"plots" , "p", 1, "character"), | ||
byrow=TRUE, ncol=4) | ||
spec <- matrix( | ||
c( | ||
"quiet", "q", 0, "logical", | ||
"outfile", "o", 1, "character", | ||
"countsfile", "n", 1, "character", | ||
"factorName", "N", 1, "character", | ||
"levelNameA", "A", 1, "character", | ||
"levelNameB", "B", 1, "character", | ||
"levelAfiles", "a", 1, "character", | ||
"levelBfiles", "b", 1, "character", | ||
"alignmentA", "i", 1, "character", | ||
"alignmentB", "j", 1, "character", | ||
"plots", "p", 1, "character" | ||
), | ||
byrow = TRUE, ncol = 4 | ||
) | ||
opt <- getopt(spec) | ||
|
||
# if help was asked for print a friendly message | ||
# and exit with a non-zero error code | ||
if (!is.null(opt$help)) { | ||
cat(getopt(spec, usage=TRUE)) | ||
q(status=1) | ||
} | ||
|
||
# enforce the following required arguments | ||
if (is.null(opt$outfile)) { | ||
cat("'outfile' is required\n") | ||
q(status=1) | ||
} | ||
if (is.null(opt$levelAfiles) | is.null(opt$levelBfiles)) { | ||
cat("input count files are required for both levels\n") | ||
q(status=1) | ||
} | ||
if (is.null(opt$alignmentA) | is.null(opt$alignmentB)) { | ||
cat("total aligned read files are required for both levels\n") | ||
q(status=1) | ||
} | ||
|
||
verbose <- if (is.null(opt$quiet)) { | ||
TRUE | ||
} else { | ||
FALSE | ||
} | ||
|
||
suppressPackageStartupMessages({ | ||
library("edgeR") | ||
library("limma") | ||
}) | ||
|
||
# build levels A and B file lists | ||
|
||
library("rjson") | ||
filesA <- fromJSON(opt$levelAfiles, method = "C", unexpected.escape = "error") | ||
filesB <- fromJSON(opt$levelBfiles, method = "C", unexpected.escape = "error") | ||
listA <- list() | ||
indice = 0 | ||
indice <- 0 | ||
listA[["level"]] <- opt$levelNameA | ||
for (file in filesA) { | ||
indice = indice +1 | ||
listA[[paste0(opt$levelNameA,"_",indice)]] <- read.delim(file, header=FALSE) | ||
} | ||
indice <- indice + 1 | ||
listA[[paste0(opt$levelNameA, "_", indice)]] <- read.delim(file, header = FALSE) | ||
} | ||
listB <- list() | ||
indice = 0 | ||
indice <- 0 | ||
listB[["level"]] <- opt$levelNameB | ||
for (file in filesB) { | ||
indice = indice +1 | ||
listB[[paste0(opt$levelNameB,"_",indice)]] <- read.delim(file, header=FALSE) | ||
} | ||
indice <- indice + 1 | ||
listB[[paste0(opt$levelNameB, "_", indice)]] <- read.delim(file, header = FALSE) | ||
} | ||
|
||
# build a counts table | ||
counts <- data.frame(row.names=listA[[2]][,1]) | ||
counts <- data.frame(row.names = listA[[2]][, 1]) | ||
for (element in names(listA[-1])) { | ||
counts<-cbind(counts, listA[[element]][,4]) | ||
} | ||
counts <- cbind(counts, listA[[element]][, 4]) | ||
} | ||
for (element in names(listB[-1])) { | ||
counts<-cbind(counts, listB[[element]][,4]) | ||
} | ||
colnames(counts)=c(names(listA[-1]), names(listB[-1])) | ||
counts <- cbind(counts, listB[[element]][, 4]) | ||
} | ||
colnames(counts) <- c(names(listA[-1]), names(listB[-1])) | ||
|
||
# build aligned counts vector | ||
|
||
filesi <- fromJSON(opt$alignmentA, method = "C", unexpected.escape = "error") | ||
filesj <- fromJSON(opt$alignmentB, method = "C", unexpected.escape = "error") | ||
sizes <- c() | ||
for (file in filesi) { | ||
sizes <- c(sizes, read.delim(file, header=TRUE)[1,1]) | ||
} | ||
sizes <- c(sizes, read.delim(file, header = TRUE)[1, 1]) | ||
} | ||
for (file in filesj) { | ||
sizes <- c(sizes, read.delim(file, header=TRUE)[1,1]) | ||
} | ||
sizes <- c(sizes, read.delim(file, header = TRUE)[1, 1]) | ||
} | ||
|
||
# build a meta data object | ||
|
||
meta <- data.frame( | ||
row.names=colnames(counts), | ||
condition=c(rep(opt$levelNameA,length(filesA)), rep(opt$levelNameB,length(filesB)) ), | ||
libsize=sizes | ||
row.names = colnames(counts), | ||
condition = c(rep(opt$levelNameA, length(filesA)), rep(opt$levelNameB, length(filesB))), | ||
libsize = sizes | ||
) | ||
|
||
|
||
# Define the library size and conditions for the GLM | ||
libsize <- meta$libsize | ||
condition <- factor(meta$condition) | ||
design <- model.matrix(~0+condition) | ||
colnames(design) <- levels(meta$condition) | ||
|
||
design <- model.matrix(~ 0 + condition) | ||
colnames(design) <- levels(condition) | ||
|
||
# Build a DGE object for the GLM | ||
y <- DGEList(counts=counts, lib.size=libsize) | ||
y <- DGEList(counts = counts, lib.size = libsize) | ||
|
||
# Normalize the data | ||
y <- calcNormFactors(y) | ||
y$samples | ||
# plotMDS(y) latter | ||
|
||
# Estimate the variance | ||
y <- estimateGLMCommonDisp(y, design) | ||
y <- estimateGLMTrendedDisp(y, design) | ||
y <- estimateGLMTagwiseDisp(y, design) | ||
# plotBCV(y) latter | ||
|
||
# Builds and outputs an object to contain the normalized read abundance in counts per million of reads | ||
cpm <- cpm(y, log=FALSE, lib.size=libsize) | ||
cpm <- cpm(y, log = FALSE, lib.size = libsize) | ||
cpm <- as.data.frame(cpm) | ||
colnames(cpm) <- colnames(counts) | ||
if (!is.null(opt$countsfile)) { | ||
normalizedAbundance <- data.frame(Tag=rownames(cpm)) | ||
normalizedAbundance <- data.frame(Tag = rownames(cpm)) | ||
normalizedAbundance <- cbind(normalizedAbundance, cpm) | ||
write.table(normalizedAbundance, file=opt$countsfile, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE) | ||
write.table(normalizedAbundance, file = opt$countsfile, sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE) | ||
} | ||
|
||
# Conduct fitting of the GLM | ||
yfit <- glmFit(y, design) | ||
|
||
# Initialize result matrices to contain the results of the GLM | ||
results <- matrix(nrow=dim(counts)[1],ncol=0) | ||
logfc <- matrix(nrow=dim(counts)[1],ncol=0) | ||
results <- matrix(nrow = dim(counts)[1], ncol = 0) | ||
logfc <- matrix(nrow = dim(counts)[1], ncol = 0) | ||
|
||
# Make the comparisons for the GLM | ||
my.contrasts <- makeContrasts( | ||
paste0(opt$levelNameA,"_",opt$levelNameB," = ", opt$levelNameA, " - ", opt$levelNameB), | ||
paste0(opt$levelNameA, "_", opt$levelNameB, " = ", opt$levelNameA, " - ", opt$levelNameB), | ||
levels = design | ||
) | ||
|
||
# Define the contrasts used in the comparisons | ||
allcontrasts = paste0(opt$levelNameA," vs ",opt$levelNameB) | ||
allcontrasts <- paste0(opt$levelNameA, " vs ", opt$levelNameB) | ||
|
||
# Conduct a for loop that will do the fitting of the GLM for each comparison | ||
# Put the results into the results objects | ||
lrt <- glmLRT(yfit, contrast=my.contrasts[,1]) | ||
plotSmear(lrt, de.tags=rownames(y)) | ||
title(allcontrasts) | ||
res <- topTags(lrt,n=dim(c)[1],sort.by="none")$table | ||
results <- cbind(results,res[,c(1,5)]) | ||
logfc <- cbind(logfc,res[c(1)]) | ||
lrt <- glmLRT(yfit, contrast = my.contrasts[, 1]) | ||
res <- topTags(lrt, n = dim(c)[1], sort.by = "none")$table | ||
results <- cbind(results, res[, c(1, 5)]) | ||
logfc <- cbind(logfc, res[c(1)]) | ||
|
||
# Add the repeat types back into the results. | ||
# We should still have the same order as the input data | ||
results$class <- listA[[2]][,2] | ||
results$type <- listA[[2]][,3] | ||
|
||
results$class <- listA[[2]][, 2] | ||
results$type <- listA[[2]][, 3] | ||
# Sort the results table by the FDR | ||
results <- results[with(results, order(FDR)), ] | ||
|
||
# Save the results | ||
write.table(results, opt$outfile, quote=FALSE, sep="\t", col.names=FALSE) | ||
|
||
# Plot Fold Changes for repeat classes and types | ||
|
||
# open the device and plots | ||
if (!is.null(opt$plots)) { | ||
if (verbose) cat("creating plots\n") | ||
pdf(opt$plots) | ||
plotMDS(y, main="Multidimensional Scaling Plot Of Distances Between Samples") | ||
plotBCV(y, xlab="Gene abundance (Average log CPM)", main="Biological Coefficient of Variation Plot") | ||
plotMDS(y, main = "Multidimensional Scaling Plot Of Distances Between Samples") | ||
plotBCV(y, xlab = "Gene abundance (Average log CPM)", main = "Biological Coefficient of Variation Plot") | ||
logFC <- results[, "logFC"] | ||
# Plot the repeat classes | ||
classes <- with(results, reorder(class, -logFC, median)) | ||
classes | ||
par(mar=c(6,10,4,1)) | ||
boxplot(logFC ~ classes, data=results, outline=FALSE, horizontal=TRUE, | ||
las=2, xlab="log2(Fold Change)", main=paste0(allcontrasts, ", by Class")) | ||
abline(v=0) | ||
par(mar = c(6, 10, 4, 1)) | ||
boxplot(logFC ~ classes, | ||
data = results, outline = FALSE, horizontal = TRUE, | ||
las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Class") | ||
) | ||
abline(v = 0) | ||
# Plot the repeat types | ||
types <- with(results, reorder(type, -logFC, median)) | ||
boxplot(logFC ~ types, data=results, outline=FALSE, horizontal=TRUE, | ||
las=2, xlab="log2(Fold Change)", main=paste0(allcontrasts, ", by Type"), yaxt="n") | ||
axis(2, cex.axis=(1*28)/(length(levels(types))), | ||
at=seq(from=1, to=length(levels(types))), | ||
labels=levels(types), las=2) | ||
abline(v=0) | ||
boxplot(logFC ~ types, | ||
data = results, outline = FALSE, horizontal = TRUE, | ||
las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Type") | ||
) | ||
abline(v = 0) | ||
# volcano plot | ||
TEdata = cbind(rownames(results), as.data.frame(results), score=-log(results$FDR, 10)) | ||
colnames(TEdata) = c("Tag","log2FC", "FDR", "Class", "Type", "score") | ||
color = ifelse(TEdata$FDR<0.05, "red", "black") | ||
s <- subset(TEdata, FDR<0.01) | ||
with(TEdata, plot(log2FC, score, pch=20, col=color, main="Volcano plot (all tag types)", ylab="-log10(FDR)")) | ||
text(s[,2], s[,6], labels = s[,5], pos = seq(from=1, to=3), cex=0.5) | ||
TEdata <- cbind(rownames(results), as.data.frame(results), score = -log(results$FDR, 10)) | ||
colnames(TEdata) <- c("Tag", "log2FC", "FDR", "Class", "Type", "score") | ||
color <- ifelse(TEdata$FDR < 0.05, "red", "black") | ||
s <- subset(TEdata, FDR < 0.01) | ||
with(TEdata, plot(log2FC, score, pch = 20, col = color, main = "Volcano plot (all tag types)", ylab = "-log10(FDR)")) | ||
text(s[, 2], s[, 6], labels = s[, 5], pos = seq(from = 1, to = 3), cex = 0.5) | ||
} | ||
|
||
# close the plot device | ||
if (!is.null(opt$plots)) { | ||
cat("closing plot device\n") | ||
dev.off() | ||
cat("closing plot device\n") | ||
dev.off() | ||
} | ||
|
||
cat("Session information:\n\n") | ||
# Save the results | ||
results <- cbind(TE_item = rownames(results), results) | ||
colnames(results) <- c("TE_item", "log2FC", "FDR", "Class", "Type") | ||
results$log2FC <- format(results$log2FC, digits = 5) | ||
results$FDR <- format(results$FDR, digits = 5) | ||
write.table(results, opt$outfile, quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE) | ||
|
||
cat("Session information:\n\n") | ||
sessionInfo() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.83</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
<token name="@PROFILE@">23.0</token> | ||
|
||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="4.0.2">bioconductor-edger</requirement> | ||
<requirement type="package" version="3.58.1">bioconductor-limma</requirement> | ||
<requirement type="package" version="1.20.4">r-getopt</requirement> | ||
<requirement type="package" version="0.2.21">r-rjson</requirement> | ||
<requirement type="package" version="1.0.0">bowtie</requirement> | ||
<requirement type="package" version="1.19.2">samtools</requirement> | ||
<requirement type="package" version="2.23.0">bedtools</requirement> | ||
<requirement type="package" version="@TOOL_VERSION@">biopython</requirement> | ||
</requirements> | ||
</xml> | ||
</macros> |
Oops, something went wrong.