Skip to content

Commit

Permalink
Merge pull request #27 from Gerbenvandervries/master
Browse files Browse the repository at this point in the history
update RNA pipeline
  • Loading branch information
RoanKanninga authored Mar 4, 2021
2 parents 6990ad5 + ff0c3f1 commit e4f5b74
Show file tree
Hide file tree
Showing 26 changed files with 514 additions and 317 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# NGS_RNA

https://github.com/gatk-workflows/gatk4-rnaseq-germline-snps-indels/blob/master/gatk4-rna-best-practices.wdl
28 changes: 16 additions & 12 deletions parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ checkStage,module list
jobname,jobname

##### Tools and versions #####
jdkVersion,Java/1.7.0_80
jdkVersion,Java/8-LTS
NGSUtilsVersion,ngs-utils/19.03.3-GCCcore-7.3.0
NGSRNAVersion,NGS_RNA/4.0.0
fastqcVersion,FastQC/0.11.8-Java-11-LTS
Expand All @@ -16,6 +16,7 @@ picardVersion,picard/2.20.5-Java-11-LTS
htseqVersion,HTSeq/0.11.0-GCCcore-7.3.0-Python-3.7.4
hisatVersion,hisat2/2.1.0-${toolchain}
pythonVersion,Python/3.7.4-GCCcore-7.3.0-bare
python2Version,Python/2.7.16-GCCcore-7.3.0-bare
PythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1
gatkVersion,GATK/4.1.4.1-Java-8-LTS
ghostscriptVersion,Ghostscript/9.27-GCCcore-7.3.0
Expand All @@ -26,11 +27,13 @@ starVersion,STAR/2.7.3a-${toolchain}
kallistoVersion,Kallisto/0.43.0-${toolchain}
BBMapVersion,BBMap/35.69-Java-1.7.0_80
picardJar,picard.jar
gatkJar,gatk-package-4.1.4.1-local.jar
mergeSamFilesJar,MergeSamFiles
processReadCountsJar,ProcessReadCounts.jar
TrimGaloreVersion,TrimGalore/0.4.5-GCCcore-7.3.0-Python-3.7.4-bare
cutadaptVersion,cutadapt/2.6-GCCcore-7.3.0-Python-3.7.4-bare
RSeQCVersion,RSeQC/3.0.1-GCCcore-7.3.0-Python-3.7.4
leafcutterVersion,leafcutter/aa12b1e-foss-2018b

##### GENERAL DIRECTORIES #####
tmpDataDir,${workDir}/${tmpName}/
Expand Down Expand Up @@ -61,17 +64,17 @@ leftbarcodefqgz,${leftbarcodefq}.gz
rightbarcodefqgz,${rightbarcodefq}.gz
trimmedLeftBarcodeFqGz,${projectRawtmpDataDir}/${filePrefix}_${barcode}_1_val_1.fq.gz
trimmedRightBarcodeFqGz,${projectRawtmpDataDir}/${filePrefix}_${barcode}_2_val_2.fq.gz
trimmedSingleBarcodeFqGz,${projectRawtmpDataDir}/${filePrefix}_${barcode}_trimmed.fq.gz
workflowFile,$MC_HOME/NGS_RNA_seq_pipeline/workflow.csv

##### GENOME,INDEX,ANNOTATION FILES #####
ensembleDir,${dataDir}/Ensembl/GrCh37.75/pub/release-${ensembleReleaseVersion}/gtf/${speciesFileName}/
geneAnnotationTxt,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}.annotation.geneIds.txt.gz
annotationGtf,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}.gtf
annotationRefFlat,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}.gtf.annotation.refFlat
annotationIntervalList,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}.rrna.interval_list
starIndex,${dataDir}/ftp.broadinstitute.org/bundle/2.8/${genome}/${starVersion}/
indexFile,${indexSpecies}
dbsnpVcf,${dbSNPDir}${dbSNPFileID}.vcf
starIndex,/apps/data/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/STAR-2.7.3a-foss-2018b/

##### PREFIXES,POSTFIXES #####
rawFileExt,fq.gz
Expand Down Expand Up @@ -108,26 +111,27 @@ addOrReplaceGroupsBam,${intermediateDir}/${filePrefix}_${barcode}.rg.sorted.bam
addOrReplaceGroupsBai,${intermediateDir}/${filePrefix}_${barcode}.rg.sorted.bai
sampleMergedBam,${intermediateDir}/${externalSampleID}.sorted.merged.bam
sampleMergedBai,${intermediateDir}/${externalSampleID}.sorted.merged.bai
sampleMergedBamExt,sorted.merged.bam
sampleMergedDedupBam,${intermediateDir}/${externalSampleID}.sorted.merged.dedup.bam
sampleMergedDedupBai,${intermediateDir}/${externalSampleID}.sorted.merged.dedup.bai
starLogFile,${intermediateDir}/${externalSampleID}.hisat.final.log,
fragmentLength,200

##### Protocols 4,5a,5b,5c (SplitAndTrim,GatkHaplotypeCallerGvcf,GatkMergeGvcf,GatkGenotypeGvcf) #####
splitAndTrimBam,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.bam
splitAndTrimBai,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.bai
splitAndTrimShortBam,${externalSampleID}.sorted.merged.dedup.splitAndTrim.bam
splitAndTrimShortBai,${externalSampleID}.sorted.merged.dedup.splitAndTrim.bai
IndelRealignedBam,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.realigned.bam
IndelRealignedBai,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.realigned.bai
bqsrBeforeGrp,${intermediateDir}${externalSampleID}.before.grp
bqsrBam,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.realigned.bqsr.bam
bqsrBai,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.realigned.bqsr.bai
GatkHaplotypeCallerGvcf,${intermediateDir}${externalSampleID}.GatkHaplotypeCallerGvcf.g.vcf
bqsrBam,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.bqsr.bam
bqsrBai,${intermediateDir}${externalSampleID}.sorted.merged.dedup.splitAndTrim.bqsr.bai
GatkHaplotypeCallerGvcf,${intermediateDir}${externalSampleID}.GatkHaplotypeCallerGvcf.g.vcf.gz
GatkHaplotypeCallerGvcfidx,${intermediateDir}${externalSampleID}.GatkHaplotypeCallerGvcf.g.vcf.idx
GatkMergeGvcf,${intermediateDir}${externalSampleID}.MergeGvcf.g.vcf
GatkMergeGvcfidx,${intermediateDir}.MergeGvcf.g.vcf.idx
projectBatchGenotypedVariantCalls,${projectPrefix}.variant.calls.genotyped.chr${chr}.vcf
projectBatchCombinedVariantCalls,${projectPrefix}.variant.calls.combined.chr${chr}.g.vcf

projectBatchGenotypedVariantCalls,${projectPrefix}.variant.calls.genotyped.vcf
projectBatchCombinedVariantCalls,${projectPrefix}.variant.calls.combined.g.vcf.gz
##### Protocols 2,7 (QCStats, QC_Report) #####
collectMultipleMetricsPrefix,${intermediateDir}${externalSampleID}
flagstatMetrics,${intermediateDir}${externalSampleID}.flagstat
Expand All @@ -145,5 +149,5 @@ recreateinsertsizepdfR,createInsertSizePlot.R
qcMatrics,${intermediateDir}/${externalSampleID}.total.qc.metrics.table

##### Protocols 3,6 (HTSeq count, MakeExpressionTable) #####
sampleHTseqExpressionText,${intermediateDir}/${externalSampleID}.htseq.txt
projectHTseqExpressionTable,${intermediateDir}/${project}.expression.genelevel.v${ensembleReleaseVersion}.htseq.txt.table
sampleHTseqExpressionText,${intermediateDir}/${externalSampleID}.counts.txt
projectHTseqExpressionTable,${intermediateDir}/${project}.expression.genelevel.v${ensembleReleaseVersion}.counts.table
2 changes: 0 additions & 2 deletions parameters.hisat.csv
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ geneAnnotationTxt,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}
annotationGtf,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}.gtf
annotationRefFlat,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}.gtf.annotation.refFlat
annotationIntervalList,${ensembleDir}/${annotationFileName}.${ensembleReleaseVersion}.rrna.interval_list
starIndex,${dataDir}/ftp.broadinstitute.org/bundle/2.8/${genome}/${starVersion}/
indexFile,${indexSpecies}
dbsnpVcf,${dbSNPDir}${dbSNPFileID}.vcf

##### PREFIXES,POSTFIXES #####
Expand Down
11 changes: 6 additions & 5 deletions parameters.homo_sapiens.csv
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
#### GENOME VARIABLES ####
annotationFileName,Homo_sapiens.${GR}
speciesFileName,homo_sapiens
indexFileID,human_g1k_v37
indexFileID,Homo_sapiens.GRCh37.dna.primary_assembly.fa
indexFolderName,human_g1k_v37
dbSNPFileID,dbsnp_138.${genome}
dbSNPFileID,dbsnp_137.${genome}
indicesDir,${dataDir}/ftp.broadinstitute.org/bundle/2.8/${genome}/
dbSNPDir,${dataDir}/dbSNP/
hisatIndex,${dataDir}/ftp.broadinstitute.org/bundle/2.8/${genome}/hisat/0.1.5-beta-goolf-1.7.20/${indexFolderName}
kallistoIndex,${dataDir}/ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/Homo_sapiens.GRCh37.75.cdna.all.fa.idx
indexFileFastaIndex,${indexSpecies}.fai
indexSpecies,${indicesDir}/${indexFileID}.fasta
indexSpecies, ${dataDir}/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/${indexFileID}
indexChrIntervalList,${indicesDir}/${indexFileID}.chr${chr}.interval_list
indexFileDictionary,${indexSpecies}.dict
MillsAnd1000GGoldGtandard,/apps/data/1000G/phase1/Mills_and_1000G_gold_standard
MillsAnd1000GGoldGtandard,${dataDir}/1000G/phase1/Mills_and_1000G_gold_standard
indelRealignmentTargets,${MillsAnd1000GGoldGtandard}/1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.intervals
oneKgPhase1IndelsVcf,${MillsAnd1000GGoldGtandard}/1000G_phase1.indels.b37.vcf
bed12,${dataDir}Ensembl/GrCh37.75/pub/release-75/bed/${speciesFileName}/${annotationFileName}.75.bed12
annotationFile,${dataDir}Ensembl/GrCh37.75/pub/release-75/gtf/${speciesFileName}/${annotationFileName}.75_nodupes_genid.txt
goldStandardVcf,${MillsAnd1000GGoldGtandard}/1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.vcf
ensembleDir,${dataDir}/Ensembl/GrCh37.75/pub/release-${ensembleReleaseVersion}/gtf/${speciesFileName}/
2 changes: 1 addition & 1 deletion protocols/AddOrReplaceReadGroups.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ tmpAddOrReplaceGroupsBai="${MC_tmpFile}"
module load "${picardVersion}"

#check modules
"${checkStage}"
${checkStage}

echo "## $(date) Start $0"

Expand Down
47 changes: 23 additions & 24 deletions protocols/BQSR.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#string externalSampleID
#string bqsrBam
#string bqsrBai
#string IndelRealignedBam
#string IndelRealignedBai
#string splitAndTrimBam
#string splitAndTrimBai
#string indelRealignmentTargets
#string oneKgPhase1IndelsVcf
#string goldStandardVcf
Expand Down Expand Up @@ -42,31 +42,30 @@ echo
echo "Running GATK BQSR:"


java -Xmx14g -XX:ParallelGCThreads=8 -Djava.io.tmpdir=${tmpTmpDataDir} -jar $EBROOTGATK/GenomeAnalysisTK.jar \
-T BaseRecalibrator\
-R ${indexFile} \
-I ${IndelRealignedBam} \
-o ${bqsrBeforeGrp} \
-knownSites ${dbsnpVcf} \
-knownSites ${goldStandardVcf} \
-knownSites ${oneKgPhase1IndelsVcf} \
-nct 2
java -Dsamjdk.use_async_io_read_samtools=false \
-Dsamjdk.use_async_io_write_samtools=true \
-Dsamjdk.use_async_io_write_tribble=false \
-Dsamjdk.compression_level=2 \
-jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \
"${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" BaseRecalibrator \
-R "${indexFile}" \
-I "${splitAndTrimBam}" \
-O "${bqsrBeforeGrp}" \
--known-sites "${dbsnpVcf}"

java -Xmx14g -XX:ParallelGCThreads=8 -Djava.io.tmpdir=${tmpTmpDataDir} -jar $EBROOTGATK/GenomeAnalysisTK.jar \
-T PrintReads \
-R ${indexFile} \
-I ${IndelRealignedBam} \
-o ${tmpBqsrBam} \
-BQSR ${bqsrBeforeGrp} \
-nct 2
java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \
"${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" ApplyBQSR \
-R "${indexFile}" \
-I "${splitAndTrimBam}" \
-O "${tmpBqsrBam}" \
--bqsr-recal-file "${bqsrBeforeGrp}"

mv "${tmpBqsrBam}" "${bqsrBam}"
mv "${tmpBqsrBai}" "${bqsrBai}"

mv ${tmpBqsrBam} ${bqsrBam}
mv ${tmpBqsrBai} ${bqsrBai}

cd ${intermediateDir}
md5sum $(basename ${bqsrBam})> $(basename ${bqsrBam}).md5sum
md5sum $(basename ${bqsrBai})> $(basename ${bqsrBai}).md5sum
cd "${intermediateDir}"
md5sum $(basename "${bqsrBam}")> $(basename "${bqsrBam}").md5
md5sum $(basename "${bqsrBai}")> $(basename "${bqsrBai}").md5
cd -

echo "returncode: $?";
Expand Down
Loading

0 comments on commit e4f5b74

Please sign in to comment.