Skip to content

Commit

Permalink
remove submodule structure (#101)
Browse files Browse the repository at this point in the history
* test full end2end pipeline
* removed git submodule structure
* version bump

Co-authored-by: Michaela Müller <[email protected]>
  • Loading branch information
c-mertes and Michaela Müller authored Jul 21, 2020
1 parent fd32ed3 commit d254aca
Show file tree
Hide file tree
Showing 61 changed files with 2,963 additions and 65 deletions.
9 changes: 0 additions & 9 deletions .gitmodules

This file was deleted.

6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@ script:
- bcftools --version
- drop --version
- python --version

- mkdir drop_demo
- cd drop_demo
- drop demo
- snakemake -n
- snakemake --jobs 2 --cores 2
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Detection of RNA Outlier Pipeline
[![Pipeline status](https://travis-ci.org/gagneurlab/drop.svg?branch=master)](https://travis-ci.org/gagneurlab/drop)
[![Version](https://img.shields.io/badge/Version-0.9.0-green.svg)](https://github.com/gagneurlab/drop/master)
[![Version](https://img.shields.io/badge/Version-0.9.1-green.svg)](https://github.com/gagneurlab/drop/master)
[![Version](https://readthedocs.org/projects/gagneurlab-drop/badge/?version=latest)](https://gagneurlab-drop.readthedocs.io/en/latest)

The manuscript main file, supplementary figures and table can be found in the manuscript folder or in
Expand Down
84 changes: 36 additions & 48 deletions conda.recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "drop" %}
{% set version = "0.9.0" %}
{% set version = "0.9.1" %}

package:
name: "{{ name|lower }}"
Expand All @@ -10,55 +10,36 @@ source:

build:
number: 0
script:
- "{{ PYTHON }} -m pip install -vv git+https://github.com/gagneurlab/wbuild.git#egg=wbuild"
- "${R} -e 'BiocManager::install(\"mumichae/tMAE\", dependencies=FALSE, update=FALSE, ask=FALSE)'"
- "{{ PYTHON }} -m pip install . -vv"
rpaths:
- lib/R/lib/
- lib/
noarch: python
entry_points:
- drop=drop.cli:main
script: "{{ PYTHON }} -m pip install . -vv"

requirements:
build:
- {{ compiler('c') }}

host:
- python
- python >=3.6
- pip

# wbuild requirements
- pyyaml>=4.2b1
- pytest-runner

# tMAE requirements
- r-base>=4.0.0
- r-devtools
- r-biocmanager
- r-data.table
- r-ggplot2
- r-dplyr
- bioconductor-DESeq2
- bioconductor-GenomicScores
- wbuild >=1.7.0

run:
- python
- python >=3.6
- pandas
- Click>=7.0
- Click >=7.0
- click-log
- python-dateutil

# snakemake/wbuild
- snakemake=>5.5.2
- snakemake >=5.5.2
- wbuild >=1.7.0
- pandoc
- graphviz
- pyyaml>=4.2b1


# command line tools
- tabix
- samtools>=1.7
- bcftools>=1.7
- gatk4>=4.0.4
- star>=2.7
- samtools >=1.7
- bcftools >=1.7
- gatk4 >=4.0.4
- star >=2.7

# R dependencies
- r-base>=4.0.0
Expand All @@ -72,35 +53,42 @@ requirements:
- r-tidyr
- r-magrittr
- r-devtools

- r-tmae

# bioconductor packages
- bioconductor-deseq2
- bioconductor-GenomicScores
- bioconductor-outrider
- bioconductor-fraser
- bioconductor-variantannotation
- bioconductor-bsgenome.hsapiens.ucsc.hg19
#- bioconductor-mafdb.gnomad.r2.1.hs37d5
#- bioconductor-mafdb.gnomad.r2.1.grch38
#- bioconductor-mafdb.gnomad.r2.1.hs37d5
#- bioconductor-mafdb.gnomad.r2.1.grch38

test:
imports:
- drop
- wbuild
commands:
- ${R} -e "library(tMAE)"
- wbuild --version
- drop --version
- drop --help
requires:
- pytest

about:
home: "https://gagneurlab-drop.readthedocs.io"
home: https://github.com/gagneurlab/drop
license: MIT
license_family: MIT
license_file: ../LICENSE
summary: "Detection of RNA Outliers Pipeline"
doc_url:
dev_url:
license_family: OTHER
summary: Detection of RNA Outlier Pipeline
doc_url: https://gagneurlab-drop.readthedocs.io/en/latest/
dev_url: https://github.com/gagneurlab/drop

extra:
container:
# click requires a unicode locale when used with Python 3
# extended-base generates en_US.UTF-8 locale and sets LC_ALL, LANG properly
extended-base: true
identifiers:
- https://dx.doi.org/10.21203/rs.2.19080/v1
recipe-maintainers:
- c-mertes
- mumichae

4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
# -- Project information -----------------------------------------------------

project = 'DROP'
copyright = '2019, Michaela Müller'
copyright = '2020, Michaela Müller'
author = 'Michaela Müller'

# The full version, including alpha/beta/rc tags
release = '0.9.0'
release = '0.9.1'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion drop/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@click.group()
@click_log.simple_verbosity_option(logger)
@click.version_option('0.9.0',prog_name='drop')
@click.version_option('0.9.1',prog_name='drop')
def main():
pass

Expand Down
1 change: 0 additions & 1 deletion drop/modules/aberrant-expression-pipeline
Submodule aberrant-expression-pipeline deleted from f5d192
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#'---
#' title: Counts Overview
#' author: mumichae, salazar
#' wb:
#' params:
#' - ids: '`sm parser.outrider_ids`'
#' - tmpdir: '`sm drop.getMethodPath(METHOD, "tmp_dir")`'
#' input:
#' - summaries: '`sm expand(config["htmlOutputPath"] +
#' "/AberrantExpression/Counting/{annotation}/Summary_{dataset}.html",
#' annotation=list(config["geneAnnotation"].keys()), dataset=parser.outrider_ids)`'
#' output:
#' html_document:
#' code_folding: hide
#' code_download: TRUE
#'---

saveRDS(snakemake, file.path(snakemake@params$tmpdir, "counting_overview.snakemake") )
# snakemake <- readRDS(".drop/tmp/AE/counting_overview.snakemake")

# Obtain the annotations and datasets
gene_annotation_names <- names(snakemake@config$geneAnnotation)
datasets <- snakemake@config$aberrantExpression$groups

#+ echo=FALSE, results="asis"
devNull <- sapply(datasets, function(name){
sapply(gene_annotation_names, function(version){
cat(paste0(
"<h1>Dataset: ", name, "</h1>",
"<p>",
"</br>", "<a href='AberrantExpression/Counting/", version, "/Summary_", name, ".html' >Count Summary</a>",
"</br>", "</p>"
))
})
})
160 changes: 160 additions & 0 deletions drop/modules/aberrant-expression-pipeline/Scripts/Counting/Summary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#'---
#' title: "Counts Summary: `r gsub('_', ' ', snakemake@wildcards$dataset)`"
#' author:
#' wb:
#' params:
#' - tmpdir: '`sm drop.getMethodPath(METHOD, "tmp_dir")`'
#' input:
#' - ods: '`sm parser.getProcResultsDir() +
#' "/aberrant_expression/{annotation}/outrider/{dataset}/ods_unfitted.Rds"`'
#' - bam_cov: '`sm parser.getProcDataDir() +
#' "/aberrant_expression/{annotation}/outrider/{dataset}/bam_coverage.tsv"`'
#' output:
#' - wBhtml: '`sm config["htmlOutputPath"] +
#' "/AberrantExpression/Counting/{annotation}/Summary_{dataset}.html"`'
#' type: noindex
#' output:
#' html_document:
#' code_folding: hide
#' code_download: TRUE
#'---

saveRDS(snakemake, file.path(snakemake@config$tmpdir, "AE/counting_summary.snakemake") )
#snakemake <- readRDS(".drop/tmp/AE/counting_summary.snakemake")

suppressPackageStartupMessages({
library(OUTRIDER)
library(SummarizedExperiment)
library(GenomicAlignments)
library(ggplot2)
library(ggthemes)
library(cowplot)
library(data.table)
library(tidyr)
})

ods <- readRDS(snakemake@input$ods)
cnts_mtx <- counts(ods, normalized = F)

#' Number of samples: `r ncol(ods)`
#'
#' # Count Quality Control
#'
#' Compare number of records vs. read counts
#'
bam_coverage <- fread(snakemake@input$bam_cov)
bam_coverage[, sampleID := as.character(sampleID)]
coverage_dt <- merge(bam_coverage,
data.table(sampleID = colnames(ods),
read_count = colSums(cnts_mtx)),
by = "sampleID", sort = FALSE)
# read count
setorder(coverage_dt, read_count)
coverage_dt[, count_rank := .I]
# ratio
coverage_dt[, counted_frac := read_count/record_count]
setorder(coverage_dt, counted_frac)
coverage_dt[, frac_rank := .I]

# size factors
ods <- estimateSizeFactors(ods)
coverage_dt[, size_factors := sizeFactors(ods)]
setorder(coverage_dt, size_factors)
coverage_dt[, sf_rank := 1:.N]

p_depth <- ggplot(coverage_dt, aes(count_rank, read_count)) +
geom_point() +
theme_cowplot() +
background_grid() +
labs(title = "Obtained Read Counts", x="Sample Rank", y = "Reads Counted") +
ylim(c(0,NA))

p_frac <- ggplot(coverage_dt, aes(frac_rank, counted_frac)) +
geom_point() +
theme_cowplot() +
background_grid() +
labs(title = "Obtained Read Count Ratio", x = "Sample Rank",
y = "Percent Reads Counted") +
ylim(c(0,NA))

#+ QC, fig.height=6, fig.width=12
plot_grid(p_depth, p_frac)

p_sf <- ggplot(coverage_dt, aes(sf_rank, size_factors)) +
geom_point() +
ylim(c(0,NA)) +
theme_cowplot() +
background_grid() +
labs(title = 'Size Factors', x = 'Sample Rank', y = 'Size Factors')

p_sf_cov <- ggplot(coverage_dt, aes(read_count, size_factors)) +
geom_point() +
ylim(c(0,NA)) +
theme_cowplot() +
background_grid() +
labs(title = 'Size Factors vs. Read Count Ratio',
x = 'Read Count Ratio', y = 'Size Factors')

#+ sizeFactors, fig.height=6, fig.width=12
plot_grid(p_sf, p_sf_cov)

#' # Filtering
quant <- .95
filter_mtx <- list(
all = cnts_mtx,
passed_FPKM = cnts_mtx[rowData(ods)$passedFilter,],
min_1 = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 1, ],
min_10 = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 10, ]
)
filter_dt <- lapply(names(filter_mtx), function(filter_name) {
mtx <- filter_mtx[[filter_name]]
data.table(gene_ID = rownames(mtx), median_counts = rowMeans(mtx), filter = filter_name)
}) %>% rbindlist
filter_dt[, filter := factor(filter, levels = c('all', 'passed_FPKM', 'min_1', 'min_10'))]

binwidth <- .2
p_hist <- ggplot(filter_dt, aes(x = median_counts, fill = filter)) +
geom_histogram(binwidth = binwidth) +
scale_x_log10() +
facet_wrap(.~filter) +
labs(x = "Mean counts per gene", y = "Frequency", title = 'Mean Count Distribution') +
guides(col = guide_legend(title = NULL)) +
scale_fill_brewer(palette = "Paired") +
theme_cowplot() +
theme(legend.position = "none")

p_dens <- ggplot(filter_dt, aes(x = median_counts, col = filter)) +
geom_density(aes(y=binwidth * ..count..), size = 1.2) +
scale_x_log10() +
labs(x = "Mean counts per gene", y = "Frequency") +
guides(col = guide_legend(title = NULL)) +
scale_color_brewer(palette = "Paired") +
theme_cowplot() +
theme(legend.position = "top",
legend.justification="center",
legend.background = element_rect(color = NA))

#+ meanCounts, fig.height=6, fig.width=12
plot_grid(p_hist, p_dens)

#+ expressedGenes, fig.height=6, fig.width=8
plotExpressedGenes(ods) +
theme_cowplot() +
background_grid(major = "y")

expressed_genes <- as.data.table(colData(ods))
expressed_genes <- expressed_genes[, .(expressedGenes, unionExpressedGenes,
intersectionExpressedGenes, passedFilterGenes,
expressedGenesRank)]

#+echo=F
rank_1 <- expressed_genes[expressedGenesRank == 1]
#' **Rank 1:**
#' `r as.character(rank_1$expressedGenes)` expressed genes
#+echo=F
rank_n <- expressed_genes[expressedGenesRank == .N]
#' **Rank `r rank_n$expressedGenesRank`:**
#' `r as.character(rank_n$expressedGenes)` expressed genes
#' `r as.character(rank_n$unionExpressedGenes)` expressed genes (union)
#' `r as.character(rank_n$intersectionExpressedGenes)` expressed genes (intersection)
#' `r as.character(rank_n$passedFilterGenes)` genes passed the filter
Loading

0 comments on commit d254aca

Please sign in to comment.