diff --git a/Added_features.txt b/Added_features.txt new file mode 100644 index 000000000..c590a81fa --- /dev/null +++ b/Added_features.txt @@ -0,0 +1,35 @@ +Added files: + +containers: +- trimmomatic.def +- STAR.def +- featureCounts.def +- drop.def + +/rna_resources: +- outrider.pm + +/scripts: +- createMatrix.R +- outrider.R +- setResults.R + +/modules/vcf/: +- outrider.nf +- featureCounts.nf + +/modules/fastq/: +- trimmomatic.nf +- STAR.nf + +rna_install.sh + +Changes in original code: +vip_vcf.nf: lines 31-68, 353-356 +vip_cram.nf: lines 123-126 +vip_fastq.nf: lines 20-62, 153-167 +nxf.config: lines 48-53 +nxf_vcf.config: lines 42-51 +nxf_fastq.config: lines 14-24 +modules/utils.nf: lines 5-8 +modules/vcf/utils.nf: lines 109-114 diff --git a/Outrider_optimalization/Autoencoder_opt_notebook.html b/Outrider_optimalization/Autoencoder_opt_notebook.html new file mode 100644 index 000000000..d0c5294c5 --- /dev/null +++ b/Outrider_optimalization/Autoencoder_opt_notebook.html @@ -0,0 +1,23084 @@ + + +
+ + +library("OUTRIDER")
+
Loading required package: BiocParallel + +Loading required package: GenomicFeatures + +Loading required package: BiocGenerics + + +Attaching package: ‘BiocGenerics’ + + +The following objects are masked from ‘package:stats’: + + IQR, mad, sd, var, xtabs + + +The following objects are masked from ‘package:base’: + + anyDuplicated, aperm, append, as.data.frame, basename, cbind, + colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, + get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, + match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, + Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort, + table, tapply, union, unique, unsplit, which.max, which.min + + +Loading required package: S4Vectors + +Loading required package: stats4 + + +Attaching package: ‘S4Vectors’ + + +The following object is masked from ‘package:utils’: + + findMatches + + +The following objects are masked from ‘package:base’: + + expand.grid, I, unname + + +Loading required package: IRanges + +Loading required package: GenomeInfoDb + +Loading required package: GenomicRanges + +Loading required package: AnnotationDbi + +Loading required package: Biobase + +Welcome to Bioconductor + + Vignettes contain introductory material; view with + 'browseVignettes()'. To cite Bioconductor, see + 'citation("Biobase")', and for packages 'citation("pkgname")'. + + +Loading required package: SummarizedExperiment + +Loading required package: MatrixGenerics + +Loading required package: matrixStats + + +Attaching package: ‘matrixStats’ + + +The following objects are masked from ‘package:Biobase’: + + anyMissing, rowMedians + + + +Attaching package: ‘MatrixGenerics’ + + +The following objects are masked from ‘package:matrixStats’: + + colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, + colCounts, colCummaxs, colCummins, colCumprods, colCumsums, + colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs, + colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats, + colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds, + colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads, + colWeightedMeans, colWeightedMedians, colWeightedSds, + colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet, + rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods, + rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, + rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins, + rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, + rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars, + rowWeightedMads, rowWeightedMeans, rowWeightedMedians, + rowWeightedSds, rowWeightedVars + + +The following object is masked from ‘package:Biobase’: + + rowMedians + + +Loading required package: data.table + + +Attaching package: ‘data.table’ + + +The following object is masked from ‘package:SummarizedExperiment’: + + shift + + +The following object is masked from ‘package:GenomicRanges’: + + shift + + +The following object is masked from ‘package:IRanges’: + + shift + + +The following objects are masked from ‘package:S4Vectors’: + + first, second + + ++
# Load in all samples
+set_50 <- readRDS("/groups/umcg-gdio/tmp01/umcg-rheins-kars/conda_envs/outrider_50_q_fs.rds")
+all_non_opt <- readRDS("/groups/umcg-gdio/tmp01/umcg-rheins-kars/conda_envs/outrider_nonopt_q_fs.rds")
+all_opt_q <- readRDS("/groups/umcg-gdio/tmp01/umcg-rheins-kars/conda_envs/outrider_opt_q_fs.rds")
+
# Create results objects
+res_50 <- results(set_50)
+res_all_nonOptQ <- results(all_non_opt)
+res_all_optQ <- results(all_opt_q)
+
+
# Remove control samples, res_50 only contains Sjogren set so will be used as filter
+samples <- res_50$sampleID
+
# Filter sets
+library(dplyr)
+res_all_nonOptQ <- res_all_nonOptQ[res_all_nonOptQ$sampleID %in% samples,]
+res_all_optQ <- res_all_optQ[res_all_optQ$sampleID %in% samples,]
+
+Attaching package: ‘dplyr’ + + +The following objects are masked from ‘package:data.table’: + + between, first, last + + +The following object is masked from ‘package:matrixStats’: + + count + + +The following object is masked from ‘package:AnnotationDbi’: + + select + + +The following object is masked from ‘package:Biobase’: + + combine + + +The following objects are masked from ‘package:GenomicRanges’: + + intersect, setdiff, union + + +The following object is masked from ‘package:GenomeInfoDb’: + + intersect + + +The following objects are masked from ‘package:IRanges’: + + collapse, desc, intersect, setdiff, slice, union + + +The following objects are masked from ‘package:S4Vectors’: + + first, intersect, rename, setdiff, setequal, union + + +The following objects are masked from ‘package:BiocGenerics’: + + combine, intersect, setdiff, union + + +The following objects are masked from ‘package:stats’: + + filter, lag + + +The following objects are masked from ‘package:base’: + + intersect, setdiff, setequal, union + + ++
# Get amount of results per dataset
+dim(res_50)
+dim(res_all_nonOptQ)
+dim(res_all_optQ)
+
+
# Plot heatmaps for 50 samples dataset
+plotCountCorHeatmap(set_50, normalized=FALSE)
+plotCountCorHeatmap(set_50, normalized=TRUE)
+
# Plot heatmaps for full size dataset with no Q optimalization
+plotCountCorHeatmap(all_non_opt, normalized=FALSE)
+plotCountCorHeatmap(all_non_opt, normalized=TRUE)
+
# Plot heatmaps for full size dataset with Q dimesion optimalization
+plotCountCorHeatmap(all_opt_q, normalized=FALSE)
+plotCountCorHeatmap(all_opt_q, normalized=TRUE)
+