From b59bc7369056261726dec559eee7585b54692084 Mon Sep 17 00:00:00 2001 From: Pieter Neerincx Date: Mon, 19 Jun 2023 20:50:58 +0200 Subject: [PATCH 01/42] Improved indentation check. --- check/indentationcheck.sh | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/check/indentationcheck.sh b/check/indentationcheck.sh index 76d9536..61af252 100755 --- a/check/indentationcheck.sh +++ b/check/indentationcheck.sh @@ -1,19 +1,11 @@ #!/bin/bash -# -# Use either WORKSPACE location for Jenkins or -# resolve path to scripts we need to check -# when executed manually outside Jenkins. -# -if [[ -z "${WORKSPACE:-}" ]]; then - MYDIR="$(cd -P "$(dirname "${0}")" && pwd)" - WORKSPACE="$(dirname ${MYDIR})" -fi +MYDIR="$(cd -P "$(dirname "${0}")" && pwd)" echo '#####################################################################################' echo ' Bash code must be indented with TABs. Checking for lines indented with spaces ... ' echo '#####################################################################################' -grep -n '^[[:space:]]* [[:space:]]*' "${WORKSPACE:-../}/protocols/"*.sh +grep --recursive --line-number --include='*.*sh' --exclude-dir='deprecated' '^[[:space:]]* [[:space:]]*' "${MYDIR}"/../ grep_exit_status="${?}" if [[ "${grep_exit_status}" -eq 0 ]]; then echo '#####################################################################################' From 8baa99c01c3e170110406ef0caf4bdea1fb4edb6 Mon Sep 17 00:00:00 2001 From: Pieter Neerincx Date: Mon, 19 Jun 2023 21:01:47 +0200 Subject: [PATCH 02/42] Added .gitattributes and .gitignore --- .gitattributes | 6 ++++++ .gitignore | 5 +++++ 2 files changed, 11 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b161dc8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +* text=auto + +*.sh eol=lf +*.bash eol=lf +*.md eol=lf +*.cfg eol=lf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4acf491 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.DS_Store +*.project +*.pydevproject +*.settings +*.md.html \ No newline at end of file From a89528e3454e78b1a48c215cfdb55c907e0fba06 Mon Sep 17 00:00:00 2001 From: Pieter Neerincx Date: Mon, 19 Jun 2023 21:02:05 +0200 Subject: [PATCH 03/42] Deleted .DS_Store --- .DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index b88e62a16ec5834473e782f6419d82132dd27833..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOHRW;41IC5J5pr*=NVKdHxhJin3(^wq(B}jj~72a_` zk0Z6Eb{J0E^E6yyp!&PLfn=a&;M}Kct^c>wnf|{{%2_gy4E!kuWU;;5 zZum=4TPHuKwYE_2sV8Bqh0|CmCR!=xL`(7YkGP^^t~Y_Bqovbt>BM{pC@(3=z;7_{ E1q2l+mjD0& From c2c0fb25f1cd5de41fb4a5e883646b2574f3a543 Mon Sep 17 00:00:00 2001 From: Pieter Neerincx Date: Mon, 19 Jun 2023 21:03:28 +0200 Subject: [PATCH 04/42] Fixed indentation and some shellcheck issues in scripts. --- checkEnvironment.sh | 20 ++++++++++---------- templates/generate_template.sh | 4 ++-- test/autotest_generate_template.sh | 6 +++--- test/protocols/Autotest.sh | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/checkEnvironment.sh b/checkEnvironment.sh index 1d85f64..f05d6d2 100755 --- a/checkEnvironment.sh +++ b/checkEnvironment.sh @@ -1,6 +1,6 @@ HOST=$1 -if [ -f ./environment_checks.txt ] +if [[ -f ./environment_checks.txt ]] then rm ./environment_checks.txt fi @@ -10,29 +10,29 @@ TMPDIR="" GROUP="" if [ "${HOST}" == "zinc-finger.gcc.rug.nl" ] then - ENVIRONMENT_PARAMETERS="zinc-finger" - TMPDIR="tmp05" + ENVIRONMENT_PARAMETERS="zinc-finger" + TMPDIR="tmp05" elif [ "${HOST}" == "leucine-zipper.gcc.rug.nl" ] then - ENVIRONMENT_PARAMETERS="leucine-zipper" - TMPDIR="tmp06" + ENVIRONMENT_PARAMETERS="leucine-zipper" + TMPDIR="tmp06" elif [ "${HOST}" == "calculon" ] then - ENVIRONMENT_PARAMETERS="calculon" - TMPDIR="tmp04" + ENVIRONMENT_PARAMETERS="calculon" + TMPDIR="tmp04" else - echo "unknown host: running is only possible on calculon,zinc-finger or leucine-zipper" + echo "Unknown host: running is only possible on calculon, zinc-finger or leucine-zipper." fi THISDIR=$(pwd) if [[ $THISDIR == *"/groups/umcg-gaf/"* ]] then GROUP="umcg-gaf" -elif [[ $THISDIR == *"/groups/umcg-gd/"* ]] +elif [[ $THISDIR == *"/groups/umcg-gd/"* ]] then GROUP="umcg-gd" else - echo "this is not a known group, please run only in umcg-gd or umcg-gaf group" + echo "This is not a known group. Please run only in umcg-gd or umcg-gaf group." fi printf "${ENVIRONMENT_PARAMETERS}\t${TMPDIR}\t${GROUP}" > ./environment_checks.txt diff --git a/templates/generate_template.sh b/templates/generate_template.sh index 7202b24..9bd28b6 100755 --- a/templates/generate_template.sh +++ b/templates/generate_template.sh @@ -5,7 +5,7 @@ then echo "RNA pipeline loaded, proceeding" else echo "No RNA pipeline loaded, exiting" - exit 1 + exit 1 fi module list @@ -58,7 +58,7 @@ workflow="${EBROOTNGS_RNA}/workflow_${pipeline}.csv" if [ -f .compute.properties ]; then - rm .compute.properties + rm .compute.properties fi perl "${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl" "${EBROOTNGS_RNA}/parameters.${species}.${build}.csv" > \ diff --git a/test/autotest_generate_template.sh b/test/autotest_generate_template.sh index c8f714c..74f74f1 100755 --- a/test/autotest_generate_template.sh +++ b/test/autotest_generate_template.sh @@ -20,12 +20,12 @@ PIPELINE="hisat" # hisat, lexogen WORKFLOW=${EBROOTNGS_RNA}/test_workflow_${PIPELINE}.csv -if [ -f .compute.properties ]; +if [[ -f .compute.properties ]] then - rm .compute.properties + rm .compute.properties fi -if [ -f ${GAF}/generatedscripts/${PROJECT}/out.csv ]; +if [[ -f ${GAF}/generatedscripts/${PROJECT}/out.csv ]] then rm -rf ${GAF}/generatedscripts/${PROJECT}/out.csv fi diff --git a/test/protocols/Autotest.sh b/test/protocols/Autotest.sh index 3fb723e..d112f82 100755 --- a/test/protocols/Autotest.sh +++ b/test/protocols/Autotest.sh @@ -29,11 +29,11 @@ done if [[ -f "${testResults}/output_NGS_RNA/notInVcf1.txt" || -f "${testResults}/output_NGS_RNA/notInVcf2.txt" || -f "${testResults}/output_NGS_RNA/inconsistent.txt" || -f "${testResults}/output_NGS_RNA/"*.fail ]] then - echo "there are differences between the test and the original output" - echo "please fix the bug or update this test" - echo "the stats can be found here: ${testResults}/output_NGS_RNA/vcfStats.txt" - exit 1 + echo "There are differences between the test and the original output." + echo "Please fix the bug or update this test." + echo "The stats can be found here: ${testResults}/output_NGS_RNA/vcfStats.txt" + exit 1 else - echo "test succeeded" + echo "Test succeeded." head -2 "${testResults}/output_NGS_RNA/vcfStats.txt" fi From 2aa4be79b1fd0d68e49dd6d41da380e100aef18a Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 5 Jan 2023 14:11:05 +0000 Subject: [PATCH 05/42] typo --- test/test_pipeline.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_pipeline.sh b/test/test_pipeline.sh index 59745e8..982e9c2 100644 --- a/test/test_pipeline.sh +++ b/test/test_pipeline.sh @@ -6,7 +6,6 @@ function preparePipeline(){ local _projectName="PlatinumSubset_NGS_RNA" local _generatedScriptsFolder="${workfolder}/generatedscripts/${_projectName}" - TMPHOME=/home/umcg-gvdvries/git/NGS_RNA rm -f "${workfolder}/logs/${_projectName}/run01.pipeline.finished" rsync -r --verbose --recursive --links --no-perms --times --group --no-owner --devices --specials "${pipelinefolder}/test/rawdata/MY_TEST_BAM_PROJECT/"SRR1552906[249]_[12].fq.gz "${workfolder}/rawdata/ngs/MY_TEST_BAM_PROJECT/" @@ -56,7 +55,7 @@ function checkIfFinished(){ local _projectName="PlatinumSubset_NGS_RNA" count=0 minutes=0 - while [ ! -f "${workfolder}/projects/${_projectName}/run01/jobs/s15_Autotestt_0.sh.finished" ] + while [ ! -f "${workfolder}/projects/${_projectName}/run01/jobs/s15_Autotest_0.sh.finished" ] do echo "${_projectName} is not finished in $minutes minutes, sleeping for 2 minutes" From 0d7a128d9e298b3cc276720f9f6a7984edc1390a Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 5 Jan 2023 14:58:53 +0000 Subject: [PATCH 06/42] trigger --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 60a168b..6744355 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

NGS_RNA pipeline

+

NGS_RNA pipeline

Description of the different steps used in the RNA analysis pipeline

From 68070f82968eac0819564edb8b2e5a14630a44e4 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 5 Jan 2023 15:24:38 +0000 Subject: [PATCH 07/42] move resources --- parameters.homo_sapiens.GRCh37.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parameters.homo_sapiens.GRCh37.csv b/parameters.homo_sapiens.GRCh37.csv index 557c216..b3b4dab 100755 --- a/parameters.homo_sapiens.GRCh37.csv +++ b/parameters.homo_sapiens.GRCh37.csv @@ -150,7 +150,7 @@ qcMatrics,${intermediateDir}/${externalSampleID}.total.qc.metrics.table ##### Protocols 3,6 (HTSeq count, MakeExpressionTable) ##### sampleHTseqExpressionText,${intermediateDir}/${externalSampleID}.counts.txt projectHTseqExpressionTable,${intermediateDir}/${project}.expression.counts.table -rnaSeQCGTF,/groups/umcg-solve-rd/tmp01/resources/GAD/gtf/gencode.v19.annotation.patched_contigs.genes.gtf +rnaSeQCGTF,/apps/data/GAD/gtf/gencode.v19.annotation.patched_contigs.genes.gtf rnaSeQCDir,${projectResultsDir}/RNASeQC ##### GENOME,INDEX,ANNOTATION FILES ##### From be5189341d03e2654f7b1e625ceb5cd5bd589572 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 6 Jan 2023 09:03:23 +0000 Subject: [PATCH 08/42] fix yaml config file fom MutliQC --- protocols/MultiQCReport.sh | 92 +++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/protocols/MultiQCReport.sh b/protocols/MultiQCReport.sh index 65e6ccd..37fdd48 100755 --- a/protocols/MultiQCReport.sh +++ b/protocols/MultiQCReport.sh @@ -38,52 +38,52 @@ cat > "${intermediateDir}/${project}_QC_config.yaml" <<'_EOF' report_header_info: - - Contact E-mail: '${contact}' - - Pipeline Version: '${ngsVersion}' - - Project : '${project}' - - prepKit : '${prepKit}' - - '' : '' - - Used toolversions: ' ' - - '' : '' - - '': ${jdkVersion} - - '': ${fastqcVersion} - - '': ${starVersion} - - '': ${samtoolsVersion} - - '': ${rVersion} - - '': ${wkhtmltopdfVersion} - - '': ${picardVersion} - - '': ${htseqVersion} - - '': ${pythonVersion} - - '': ${gatkVersion} - - '': ${multiqcVersion} - - '' : '' - - pipeline description : '' - - Gene expression quantification : '' - - '': 'The trimmed fastQ files where aligned to build ${indexFileID} reference genome using' - - '': '${starVersion} [1] allowing for 2 mismatches. Before gene quantification' - - '': '${samtoolsVersion} [2] was used to sort the aligned reads.' - - '': 'The gene level quantification was performed by ${htseqVersion} [3] using --mode=union' - - '': '--stranded=no and, Ensembl version 75 was used as gene annotation database which is included' - - '': 'in folder expression/.' - - '' : '' - - QC metrics: '' - - '': 'Quality control (QC) metrics are calculated for the raw sequencing data. This is done using' - - '': 'the tool FastQC FastQC [4]. QC metrics are calculated for the aligned reads using' - - '': 'Picard-tools [5] CollectRnaSeqMetrics, MarkDuplicates, CollectInsertSize-' - - '': 'Metrics and SAMtools flagstat.These QC metrics form the basis in this final QC report.' - - '' : '' - - references: '' - - '' : '' - - '': '1. Dobin A, Davis C a, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M,' - - '': 'Gingeras TR: STAR: ultrafast universal RNA-seq aligner. Bioinformatics 2013, 29:15–21.' - - '': '2. Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R,' - - '': 'Subgroup 1000 Genome Project Data Processing: The Sequence Alignment/Map format and SAMtools.' - - '': 'Bioinforma 2009, 25 (16):2078–2079.' - - '': '3. Anders S, Pyl PT, Huber W: HTSeq – A Python framework to work with high-throughput sequencing data' - - '': 'HTSeq – A Python framework to work with high-throughput sequencing data. 2014:0–5.' - - '': '4. Andrews, S. (2010). FastQC a Quality Control Tool for High Throughput Sequence Data [Online].' - - '': 'Available online at: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ ${samtoolsVersion}' - - '': '5. Picard Sourceforge Web site. http://picard.sourceforge.net/ ${picardVersion}' + - 'Contact E-mail' : '${contact}' + - 'Pipeline Version' : '${ngsVersion}' + - 'Project' : '${project}' + - 'prepKit' : '${prepKit}' + - '' : '' + - 'Used toolversions' : ' ' + - '' : '' + - '' : ${jdkVersion} + - '' : ${fastqcVersion} + - '' : ${starVersion} + - '' : ${samtoolsVersion} + - '' : ${rVersion} + - '' : ${wkhtmltopdfVersion} + - '' : ${picardVersion} + - '' : ${htseqVersion} + - '' : ${pythonVersion} + - '' : ${gatkVersion} + - '' : ${multiqcVersion} + - '' : '' + - 'pipeline description' : '' + - 'Gene expression quantification' : '' + - '' : 'The trimmed fastQ files where aligned to build ${indexFileID} reference genome using' + - '' : '${starVersion} [1] allowing for 2 mismatches. Before gene quantification' + - '' : '${samtoolsVersion} [2] was used to sort the aligned reads.' + - '' : 'The gene level quantification was performed by ${htseqVersion} [3] using --mode=union' + - '' : '--stranded=no and, Ensembl version 75 was used as gene annotation database which is included' + - '' : 'in folder expression/.' + - '' : '' + - 'QC metrics' : '' + - '' : 'Quality control (QC) metrics are calculated for the raw sequencing data. This is done using' + - '' : 'the tool FastQC FastQC [4]. QC metrics are calculated for the aligned reads using' + - '' : 'Picard-tools [5] CollectRnaSeqMetrics, MarkDuplicates, CollectInsertSize-' + - '' : 'Metrics and SAMtools flagstat.These QC metrics form the basis in this final QC report.' + - '' : '' + - 'references' : '' + - '' : '' + - '' : '1. Dobin A, Davis C a, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M,' + - '' : 'Gingeras TR: STAR: ultrafast universal RNA-seq aligner. Bioinformatics 2013, 29:15–21.' + - '' : '2. Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R,' + - '' : 'Subgroup 1000 Genome Project Data Processing: The Sequence Alignment/Map format and SAMtools.' + - '' : 'Bioinforma 2009, 25 (16):2078–2079.' + - '' : '3. Anders S, Pyl PT, Huber W: HTSeq – A Python framework to work with high-throughput sequencing data' + - '' : 'HTSeq – A Python framework to work with high-throughput sequencing data. 2014:0–5.' + - '' : '4. Andrews, S. (2010). FastQC a Quality Control Tool for High Throughput Sequence Data [Online].' + - '' : 'Available online at: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ ${samtoolsVersion}' + - '' : '5. Picard Sourceforge Web site. http://picard.sourceforge.net/ ${picardVersion}' _EOF From 4b54a5a8cd50227f0431703cbc4b12f4b0bbf78a Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Wed, 11 Jan 2023 13:20:03 +0100 Subject: [PATCH 09/42] remove failing date, and unneeded echo --- protocols/AddOrReplaceReadGroups.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/protocols/AddOrReplaceReadGroups.sh b/protocols/AddOrReplaceReadGroups.sh index bb38a1b..5fac966 100755 --- a/protocols/AddOrReplaceReadGroups.sh +++ b/protocols/AddOrReplaceReadGroups.sh @@ -32,7 +32,6 @@ module load "${picardVersion}" #check modules module list -echo "## $(date) Start $0" java -Xmx6g -XX:ParallelGCThreads=8 -jar "${EBROOTPICARD}/${picardJar}" AddOrReplaceReadGroups \ I="${sortedBam}" \ @@ -43,7 +42,6 @@ RGLB="${externalSampleID}" \ RGPL=ILLUMINA \ RGPU="${sequencer}_${flowcell}_${run}" \ RGSM="${externalSampleID}" \ -RGDT=$(date --rfc-3339=date) \ CREATE_INDEX=true \ MAX_RECORDS_IN_RAM=4000000 \ TMP_DIR="${tempDir}" @@ -52,4 +50,3 @@ echo "returncode: $?"; mv "${tmpAddOrReplaceGroupsBam}" "${addOrReplaceGroupsBam}" mv "${tmpAddOrReplaceGroupsBai}" "${addOrReplaceGroupsBai}" echo "succes moving files"; -echo "## $(date) ## $0 Done " From e7242f4afdd50537b58c9cdfa7c3a06bdcca9e8d Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Wed, 11 Jan 2023 13:20:52 +0100 Subject: [PATCH 10/42] shellcheck fixies --- protocols/BQSR.sh | 13 ++----------- protocols/CombineFastq.sh | 4 ++-- protocols/CopyPrmTmpData.sh | 1 - 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/protocols/BQSR.sh b/protocols/BQSR.sh index 3d1ba31..fa0b98c 100755 --- a/protocols/BQSR.sh +++ b/protocols/BQSR.sh @@ -27,23 +27,15 @@ makeTmpDir "${bqsrBai}" tmpBqsrBai=${MC_tmpFile} #Load Modules -module load ${gatkVersion} +module load "${gatkVersion}" #check modules module list -echo "## $(date) Start $0" - -echo -echo echo "Running GATK BQSR:" -java -Dsamjdk.use_async_io_read_samtools=false \ --Dsamjdk.use_async_io_write_samtools=true \ --Dsamjdk.use_async_io_write_tribble=false \ --Dsamjdk.compression_level=2 \ --jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ +jan -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ "${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" BaseRecalibrator \ -R "${indexFile}" \ -I "${splitAndTrimBam}" \ @@ -67,5 +59,4 @@ cd - echo "returncode: $?"; echo "succes moving files"; -echo "## $(date) ## $0 Done " diff --git a/protocols/CombineFastq.sh b/protocols/CombineFastq.sh index 7f52f32..e51eae0 100644 --- a/protocols/CombineFastq.sh +++ b/protocols/CombineFastq.sh @@ -40,12 +40,12 @@ INPUTSRIGHT=() for FqFileLeft in "${trimmedLeftBarcodeFqGz[@]}" do - array_contains INPUTSLEFT "${FqFileLeft}" || INPUTSLEFT+=("${FqFileLeft}") # If bamFile does not exist in array add it + array_contains INPUTSLEFT "${FqFileLeft}" || INPUTSLEFT+=("${FqFileLeft}") done for FqFileRight in "${trimmedRightBarcodeFqGz[@]}" do - array_contains INPUTSRIGHT "${FqFileRight}" || INPUTSRIGHT+=("${FqFileRight}") # If baiFile does not exist in array add it + array_contains INPUTSRIGHT "${FqFileRight}" || INPUTSRIGHT+=("${FqFileRight}") done if [[ "${#INPUTSLEFT[@]}" == 1 ]] diff --git a/protocols/CopyPrmTmpData.sh b/protocols/CopyPrmTmpData.sh index a325c70..d129791 100755 --- a/protocols/CopyPrmTmpData.sh +++ b/protocols/CopyPrmTmpData.sh @@ -21,7 +21,6 @@ #list barcode #list lane -n_elements="${internalSampleID[@]}" max_index="${#internalSampleID[@]}"-1 for ((samplenumber = 0; samplenumber <= max_index; samplenumber++)) From 903621a804101b7186de06fc9ca9ff1d9b61f1bb Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Wed, 11 Jan 2023 15:02:23 +0100 Subject: [PATCH 11/42] shellcheck fixes --- protocols/BQSR.sh | 8 +-- protocols/CombineFastq.sh | 4 +- protocols/CopyPrmTmpData.sh | 9 +-- protocols/CopyToResultsDir.sh | 6 +- protocols/CreateExternSamplesProjects.sh | 16 ++--- protocols/CreateInhouseRnaSeqProjects.sh | 22 +----- protocols/DE.sh | 6 +- protocols/Design.sh | 4 +- protocols/FastQC.sh | 22 +++--- protocols/GatkGenotypeGvcf.sh | 30 ++++----- protocols/GatkHaplotypeCallerGvcf.sh | 9 +-- protocols/HTSeq_count.sh | 2 +- protocols/IndelRealignment.sh | 9 +-- protocols/Leafcutter_intron_clustering.sh | 4 +- protocols/MakeExpressionTable.sh | 4 +- protocols/MarkDuplicates.sh | 8 +-- protocols/MergeBam.sh | 20 +++--- protocols/MultiQCReport.sh | 2 +- protocols/OUTRIDER.sh | 6 +- protocols/QCStats.sh | 67 +++++++++---------- protocols/STAR.sh | 8 +-- protocols/SplitAndTrim.sh | 12 +--- protocols/Stranded.sh | 9 +-- protocols/TIN.sh | 8 +-- protocols/{ => deprecated}/GatkMergeGvcf.sh | 12 ++-- .../{ => deprecated}/filter_Leafcutter.sh | 3 +- protocols/rMATs.sh | 2 +- 27 files changed, 119 insertions(+), 193 deletions(-) rename protocols/{ => deprecated}/GatkMergeGvcf.sh (87%) rename protocols/{ => deprecated}/filter_Leafcutter.sh (93%) diff --git a/protocols/BQSR.sh b/protocols/BQSR.sh index fa0b98c..825aa1d 100755 --- a/protocols/BQSR.sh +++ b/protocols/BQSR.sh @@ -52,10 +52,10 @@ java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ mv "${tmpBqsrBam}" "${bqsrBam}" mv "${tmpBqsrBai}" "${bqsrBai}" -cd "${intermediateDir}" -md5sum $(basename "${bqsrBam}")> $(basename "${bqsrBam}").md5 -md5sum $(basename "${bqsrBai}")> $(basename "${bqsrBai}").md5 -cd - +cd "${intermediateDir}" || exit +md5sum "$(basename "${bqsrBam}")" > "$(basename "${bqsrBam}").md5" +md5sum "$(basename "${bqsrBai}")" > "$(basename "${bqsrBai}").md5" +cd - || exit echo "returncode: $?"; echo "succes moving files"; diff --git a/protocols/CombineFastq.sh b/protocols/CombineFastq.sh index e51eae0..7817a2c 100644 --- a/protocols/CombineFastq.sh +++ b/protocols/CombineFastq.sh @@ -50,8 +50,8 @@ done if [[ "${#INPUTSLEFT[@]}" == 1 ]] then - ln -sf $(basename "${INPUTSLEFT[0]}") "${mergedLeftBarcodeFqGz}" - ln -sf $(basename "${INPUTSRIGHT[0]}") "${mergedRightBarcodeFqGz}" + ln -sf "$(basename "${INPUTSLEFT[0]}")" "${mergedLeftBarcodeFqGz}" + ln -sf "$(basename "${INPUTSRIGHT[0]}")" "${mergedRightBarcodeFqGz}" echo "nothing to merge because there is only one sample" else cat "${INPUTSLEFT[@]}" > "${tmpMergedLeftBarcodeFqGz}" diff --git a/protocols/CopyPrmTmpData.sh b/protocols/CopyPrmTmpData.sh index d129791..761a4bf 100755 --- a/protocols/CopyPrmTmpData.sh +++ b/protocols/CopyPrmTmpData.sh @@ -84,14 +84,11 @@ do done -cd "${TMPDATADIR}" -if md5sum -c *.md5 > "${RUNNAME}.md5.log" 2>&1 +cd "${TMPDATADIR}" || exit +if md5sum -c ./*.md5 > "${RUNNAME}.md5.log" 2>&1 then echo 'PASS' else echo 'FAILED' fi -cd - - - - +cd - || exit diff --git a/protocols/CopyToResultsDir.sh b/protocols/CopyToResultsDir.sh index b08f186..7fa671e 100755 --- a/protocols/CopyToResultsDir.sh +++ b/protocols/CopyToResultsDir.sh @@ -61,8 +61,6 @@ mkdir -p "${projectResultsDir}/qcmetrics" # Copy BAM plus index plus md5 sum to results directory -usedWorkflow=$(basename ${workflow}) - rsync -avL "${intermediateDir}"/*.sorted.merged.bam "${projectResultsDir}/alignment/" rsync -avL "${intermediateDir}"/*.sorted.merged.bam.{md5sum,bai,bai.md5sum} "${projectResultsDir}/alignment/" @@ -79,7 +77,7 @@ usedWorkflow=$(basename ${workflow}) rsync -av "${intermediateDir}"/*.idxstats "${projectResultsDir}/qcmetrics/" rsync -av "${intermediateDir}"/*.collectrnaseqmetrics "${projectResultsDir}/qcmetrics/" - if [ "${seqType}" == "PE" ] + if [[ "${seqType}" == "PE" ]] then rsync -av "${intermediateDir}"/*.insert_size_metrics "${projectResultsDir}/qcmetrics/" else @@ -112,7 +110,7 @@ usedWorkflow=$(basename ${workflow}) # Copy STAR annotated SpliceJunctions rsync -av "${intermediateDir}/"*.SJ.* "${projectResultsDir}/star_sj/" #only available with PE - if [ "${seqType}" == "PE" ] + if [[ "${seqType}" == "PE" ]] then rsync -av "${intermediateDir}"/*.insert_size_* "${projectResultsDir}/qcmetrics/" else diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index 23f18d7..c22799c 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -30,10 +30,9 @@ #list externalFastQ_1 #list externalFastQ_2 -umask 0007 module load "${ngsUtilsVersion}" - module list + # # Create project dirs. # @@ -45,9 +44,9 @@ mkdir -p "${intermediateDir}" mkdir -p "${projectResultsDir}" mkdir -p "${projectQcDir}" -ROCKETPOINT=`pwd` +ROCKETPOINT="${PWD}" -cd "${projectRawtmpDataDir}" +cd "${projectRawtmpDataDir}" || exit # # Create symlinks to the raw data required to analyse this project @@ -56,7 +55,6 @@ cd "${projectRawtmpDataDir}" # -#n_elements="${internalSampleID[@]}" max_index="${#internalSampleID[@]}"-1 for ((samplenumber = 0; samplenumber <= max_index; samplenumber++)) do @@ -87,16 +85,10 @@ do fi done -cd $ROCKETPOINT +cd "${ROCKETPOINT}" || exit -echo "before splitting" -echo `pwd` module load "${ngsVersion}" -# -# TODO: array for each sample: -# - # # Create subset of samples for this project. # diff --git a/protocols/CreateInhouseRnaSeqProjects.sh b/protocols/CreateInhouseRnaSeqProjects.sh index 65abdfd..88dc334 100755 --- a/protocols/CreateInhouseRnaSeqProjects.sh +++ b/protocols/CreateInhouseRnaSeqProjects.sh @@ -32,12 +32,6 @@ #list barcode #list lane -# -# Change permissions. -# -umask 0007 - -#FIX! module load "${ngsVersion}" module load "${ngsUtilsVersion}" module list @@ -53,9 +47,9 @@ mkdir -p "${intermediateDir}" mkdir -p "${projectResultsDir}" mkdir -p "${projectQcDir}" -ROCKETPOINT=`pwd` +ROCKETPOINT="${pwd}" -cd "${projectRawtmpDataDir}" +cd "${projectRawtmpDataDir}" || exit # # Create symlinks to the raw data required to analyse this project @@ -63,7 +57,6 @@ cd "${projectRawtmpDataDir}" # For each sequence file (could be multiple per sample): # -n_elements="${internalSampleID[@]}" max_index="${#internalSampleID[@]}-1" for ((samplenumber = 0; samplenumber <= max_index; samplenumber++)) do @@ -97,14 +90,8 @@ do done -cd $ROCKETPOINT +cd "${ROCKETPOINT}" || exit -echo "before splitting" -echo pwd - -# -# TODO: array for each sample: -# # # Create subset of samples for this project. @@ -123,9 +110,6 @@ then rm ../.compute.properties fi -echo "before run second rocket" -echo pwd - sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \ -p "${mainParameters}" \ -p "${parameters_build}" \ diff --git a/protocols/DE.sh b/protocols/DE.sh index c4d271f..f0e0923 100755 --- a/protocols/DE.sh +++ b/protocols/DE.sh @@ -37,10 +37,10 @@ do array_contains UNIQUESAMPLES "${sampleId}" || UNIQUESAMPLES+=("${sampleId}") # If sampleId does not exist in array add it done -cd "${intermediateDir}" +cd "${intermediateDir}" || exit #detect number of conditions -col=$(col="condition"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n $col) +col=$(col="condition"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n "${col}") colArray=(${col//:/ }) conditionCount=$(tail -n +2 "${projectJobsDir}/${project}.csv" | cut -d "," -f "${colArray[0]}" | sort | uniq | wc -l) @@ -69,4 +69,4 @@ else echo "running: deseq2 analysis" Rscript "${EBROOTNGS_RNA}/scripts/deseq2_analysis.R" "${projectJobsDir}/${project}.csv" "${project}" fi -cd - +cd - || exit diff --git a/protocols/Design.sh b/protocols/Design.sh index b9536a0..ba33b14 100755 --- a/protocols/Design.sh +++ b/protocols/Design.sh @@ -35,7 +35,7 @@ do array_contains UNIQUESAMPLES "${sample}" || UNIQUESAMPLES+=("${sample}") # If bamFile does not exist in array add it done -cd "${intermediateDir}" +cd "${intermediateDir}" || exit #cleanup old file if present rm -f "${intermediateDir}/"*".design.tsv" @@ -66,4 +66,4 @@ done echo "files written to: ${intermediateDir}/ *.design.tsv" -cd - +cd - || exit diff --git a/protocols/FastQC.sh b/protocols/FastQC.sh index a8d8204..509589f 100755 --- a/protocols/FastQC.sh +++ b/protocols/FastQC.sh @@ -21,26 +21,26 @@ #string logsDir #Load module -module load ${fastqcVersion} +module load "${fastqcVersion}" module list -makeTmpDir ${intermediateDir} -tmpIntermediateDir=${MC_tmpFile} +makeTmpDir "${intermediateDir}" +tmpIntermediateDir="${MC_tmpFile}" #If paired-end do fastqc for both ends, else only for one -if [ ${seqType} == "PE" ] +if [[ "${seqType}" == "PE" ]] then # end1 & end2 - fastqc ${peEnd1BarcodeFqGz} \ - ${peEnd2BarcodeFqGz} \ - -o ${tmpIntermediateDir} + fastqc "${peEnd1BarcodeFqGz}" \ + "${peEnd2BarcodeFqGz}" \ + -o "${tmpIntermediateDir}" echo -e "\nFastQC finished succesfull. Moving temp files to final.\n\n" - mv -f ${tmpIntermediateDir}/* ${intermediateDir} + mv -f "${tmpIntermediateDir}/"* "${intermediateDir}" else - fastqc ${srBarcodeFqGz} \ - -o ${tmpIntermediateDir} + fastqc "${srBarcodeFqGz}" \ + -o "${tmpIntermediateDir}" echo -e "\nFastQC finished succesfull. Moving temp files to final.\n\n" - mv -f ${tmpIntermediateDir}/* ${intermediateDir} + mv -f "${tmpIntermediateDir}/"* "${intermediateDir}" fi diff --git a/protocols/GatkGenotypeGvcf.sh b/protocols/GatkGenotypeGvcf.sh index e00dbf9..19ad9f7 100755 --- a/protocols/GatkGenotypeGvcf.sh +++ b/protocols/GatkGenotypeGvcf.sh @@ -24,7 +24,7 @@ array_contains () { local seeking="${2}" local in=1 for element in "${!array-}"; do - if [[ "$element" == "$seeking" ]]; then + if [[ "${element}" == "${seeking}" ]]; then in=0 break fi @@ -44,8 +44,6 @@ module load "${htsLibVersion}" #Check modules module list -echo "## $(date) Start $0" - INPUTS=() ALLGVCFs=() @@ -55,21 +53,21 @@ do done SAMPLESIZE=${#INPUTS[@]} -numberofbatches=$(($SAMPLESIZE / 200)) +numberofbatches=$(("${SAMPLESIZE}" / 200)) -for b in $(seq 0 $numberofbatches) +for b in $(seq 0 "${numberofbatches}") do - if [ -f ${gatkHaplotypeCallerGvcf}.$b ] + if [[ -f "${gatkHaplotypeCallerGvcf}.${b}" ]] then - ALLGVCFs+=("--variant gatkHaplotypeCallerGvcf}.$b") + ALLGVCFs+=("--variant ${gatkHaplotypeCallerGvcf}.${b}") fi done -if [ "${SAMPLESIZE}" -gt 200 ] +if [[ "${SAMPLESIZE}" -gt 200 ]] then for b in $(seq 0 "${numberofbatches}") do - if [ -f "${projectBatchCombinedVariantCalls}.${b}" ] + if [[ -f "${projectBatchCombinedVariantCalls}.${b}" ]] then ALLGVCFs+=("--variant=${projectBatchCombinedVariantCalls}.${b}") fi @@ -77,9 +75,9 @@ then else for sampleGvcf in "${gatkHaplotypeCallerGvcf[@]}" do - if [ -f "${sampleGvcf}" ] + if [[ -f "${sampleGvcf}" ]] then - array_contains ALLGVCFs "--variant=${sampleGvcf}" || ALLGVCFs+=("--variant=$sampleGvcf") + array_contains ALLGVCFs "--variant=${sampleGvcf}" || ALLGVCFs+=("--variant=${sampleGvcf}") fi done fi @@ -87,7 +85,7 @@ fi GvcfSize=${#ALLGVCFs[@]} -if [ ${GvcfSize} -ne 0 ] +if [[ ${GvcfSize} -ne 0 ]] then gatk --java-options "-Xmx5g -Djava.io.tmpdir=${tmpTmpDataDir}" CombineGVCFs \ @@ -106,11 +104,11 @@ then tabix -p vcf "${projectBatchGenotypedVariantCalls}" - printf "${projectBatchGenotypedVariantCalls} ..done\n" + echo "${projectBatchGenotypedVariantCalls} ..done" - cd "${intermediateDir}" - md5sum $(basename "${projectBatchGenotypedVariantCalls}")> $(basename "${projectBatchGenotypedVariantCalls}").md5 - cd - + cd "${intermediateDir}" || exit + md5sum "$(basename "${projectBatchGenotypedVariantCalls}")" > "$(basename "${projectBatchGenotypedVariantCalls}").md5" + cd - || exit echo "succes moving files" else diff --git a/protocols/GatkHaplotypeCallerGvcf.sh b/protocols/GatkHaplotypeCallerGvcf.sh index 678b07d..2349314 100755 --- a/protocols/GatkHaplotypeCallerGvcf.sh +++ b/protocols/GatkHaplotypeCallerGvcf.sh @@ -23,13 +23,11 @@ makeTmpDir "${gatkHaplotypeCallerGvcftbi}" tmpGatkHaplotypeCallerGvcftbi=${MC_tmpFile} #Load modules -module load ${gatkVersion} +module load "${gatkVersion}" #Check modules module list -echo "## "$(date)" Start $0" - gatk --java-options "-XX:ParallelGCThreads=1 -Djava.io.tmpdir=${tmpTmpDataDir} -Xmx12g" HaplotypeCaller \ -R "${indexFile}" \ -I "${bqsrBam}" \ @@ -39,8 +37,3 @@ gatk --java-options "-XX:ParallelGCThreads=1 -Djava.io.tmpdir=${tmpTmpDataDir} - mv "${tmpGatkHaplotypeCallerGvcf}" "${gatkHaplotypeCallerGvcf}" mv "${tmpGatkHaplotypeCallerGvcftbi}" "${gatkHaplotypeCallerGvcftbi}" - -echo "returncode: $?"; -echo "succes moving files"; - -echo "## "$(date)" ## $0 Done " diff --git a/protocols/HTSeq_count.sh b/protocols/HTSeq_count.sh index 62d2bdf..cd6c0b6 100755 --- a/protocols/HTSeq_count.sh +++ b/protocols/HTSeq_count.sh @@ -34,7 +34,7 @@ then if [[ "${num1}" > 0.6 ]] then STRANDED="yes" - elif [[ "${num2}" > 0.6 ]] + elif [[ "${num2}" > 0.6 ]] then STRANDED="reverse" elif [[ "${num1}" < 0.6 && "${num2}" < 0.6 ]] diff --git a/protocols/IndelRealignment.sh b/protocols/IndelRealignment.sh index 915a90f..246fa3a 100755 --- a/protocols/IndelRealignment.sh +++ b/protocols/IndelRealignment.sh @@ -33,14 +33,9 @@ module load "${gatkVersion}" #check modules module list -echo "## $(date) Start $0" - -echo -echo echo "Running GATK IndelRealignment:" - -java -Xmx10g -XX:ParallelGCThreads=8 -Djava.io.tmpdir="${tmpTmpDataDir}" -jar "$EBROOTGATK/GenomeAnalysisTK.jar" \ +java -Xmx10g -XX:ParallelGCThreads=8 -Djava.io.tmpdir="${tmpTmpDataDir}" -jar "${EBROOTGATK}/GenomeAnalysisTK.jar" \ -T IndelRealigner \ -R "${indexFile}" \ -I "${splitAndTrimBam}" \ @@ -55,7 +50,5 @@ java -Xmx10g -XX:ParallelGCThreads=8 -Djava.io.tmpdir="${tmpTmpDataDir}" -jar "$ mv "${tmpIndelRealignedBam}" "${IndelRealignedBam}" mv "${tmpIndelRealignedBai}" "${IndelRealignedBai}" -echo "returncode: $?"; echo "succes moving files"; -echo "## $(date) ## $0 Done " diff --git a/protocols/Leafcutter_intron_clustering.sh b/protocols/Leafcutter_intron_clustering.sh index dd21168..bafb37c 100755 --- a/protocols/Leafcutter_intron_clustering.sh +++ b/protocols/Leafcutter_intron_clustering.sh @@ -27,7 +27,7 @@ echo -e "\nWith strandedness type: ${STRANDED}, where (0 = unstranded, 1 = first-strand/RF, 2, = second-strand/FR)." rm -f "${intermediateDir}${project}_juncfiles.txt" -cd "${intermediateDir}" +cd "${intermediateDir}" || exit for bamfile in *."${sampleMergedBamExt}" do @@ -54,4 +54,4 @@ python "${EBROOTLEAFCUTTER}/clustering/leafcutter_cluster_regtools.py" \ -l 500000 \ --nochromcheck=NOCHROMCHECK -cd - +cd - || exit diff --git a/protocols/MakeExpressionTable.sh b/protocols/MakeExpressionTable.sh index 7eb5477..10aeb40 100755 --- a/protocols/MakeExpressionTable.sh +++ b/protocols/MakeExpressionTable.sh @@ -18,7 +18,7 @@ array_contains () { local seeking="${2}" local in=1 for element in "${!array-}"; do - if [[ "$element" == "$seeking" ]]; then + if [[ "${element}" == "${seeking}" ]]; then in=0 break fi @@ -45,7 +45,7 @@ done python "${EBROOTNGS_RNA}/scripts/create_counts_matrix.py" \ -i "${intermediateDir}/fileList.txt" \ -o "${tmpProjectHTseqExpressionTable}" \ --e "$intermediateDir/create_counts_matrix.log" +-e "${intermediateDir}/create_counts_matrix.log" echo "table create succesfull" mv "${tmpProjectHTseqExpressionTable}" "${projectHTseqExpressionTable}" diff --git a/protocols/MarkDuplicates.sh b/protocols/MarkDuplicates.sh index b86d02b..8ae64c1 100755 --- a/protocols/MarkDuplicates.sh +++ b/protocols/MarkDuplicates.sh @@ -57,7 +57,7 @@ sambamba markdup \ mv "${tmpSampleMergedDedupBam}" "${sampleMergedDedupBam}" mv "${tmpSampleMergedDedupBai}" "${sampleMergedDedupBai}" -cd "${intermediateDir}" -md5sum "${sampleMergedDedupBam}" > $(basename "${sampleMergedDedupBam}").md5 -md5sum "${sampleMergedDedupBai}" > $(basename "${sampleMergedDedupBai}").md5 -cd - +cd "${intermediateDir}" || exit +md5sum "${sampleMergedDedupBam}" > "$(basename "${sampleMergedDedupBam}").md5" +md5sum "${sampleMergedDedupBai}" > "$(basename "${sampleMergedDedupBai}").md5" +cd - || exit diff --git a/protocols/MergeBam.sh b/protocols/MergeBam.sh index aa5ced4..6c633e3 100755 --- a/protocols/MergeBam.sh +++ b/protocols/MergeBam.sh @@ -58,14 +58,14 @@ done if [[ "${#INPUTS[@]}" == 1 ]] then - ln -sf $(basename "${INPUTBAMS[0]}") "${sampleMergedBam}" - ln -sf $(basename "${UNIQUEBAIS[0]}") "${sampleMergedBai}" + ln -sf "$(basename "${INPUTBAMS[0]}")" "${sampleMergedBam}" + ln -sf "$(basename "${UNIQUEBAIS[0]}")" "${sampleMergedBai}" echo "nothing to merge because there is only one sample" - cd "${intermediateDir}" - md5sum $(basename "${sampleMergedBam}")> $(basename "${sampleMergedBam}").md5sum - md5sum $(basename "${sampleMergedBai}")> $(basename "${sampleMergedBai}").md5sum - cd - + cd "${intermediateDir}" || exit + md5sum "$(basename "${sampleMergedBam}")" > "$(basename "${sampleMergedBam}").md5sum" + md5sum "$(basename "${sampleMergedBai}")" > "$(basename "${sampleMergedBai}").md5sum" + cd - || exit else java -XX:ParallelGCThreads=4 -jar -Xmx6g "${EBROOTPICARD}/${picardJar}" "${mergeSamFilesJar}" \ @@ -82,8 +82,8 @@ else mv "${tmpSampleMergedBam}" "${sampleMergedBam}" mv "${tmpSampleMergedBai}" "${sampleMergedBai}" - cd "${intermediateDir}" - md5sum $(basename "${sampleMergedBam}")> $(basename "${sampleMergedBam}").md5sum - md5sum $(basename "${sampleMergedBai}")> $(basename "${sampleMergedBai}").md5sum - cd - + cd "${intermediateDir}" || exit + md5sum "$(basename "${sampleMergedBam}")" > "$(basename "${sampleMergedBam}").md5sum" + md5sum "$(basename "${sampleMergedBai}")" > "$(basename "${sampleMergedBai}").md5sum" + cd - || exit fi diff --git a/protocols/MultiQCReport.sh b/protocols/MultiQCReport.sh index 37fdd48..557a0f8 100755 --- a/protocols/MultiQCReport.sh +++ b/protocols/MultiQCReport.sh @@ -94,4 +94,4 @@ multiqc -c "/intermediateDir/${project}_QC_config.yaml" \ -f "/intermediateDir/" \ -o "/projectResultsDir/" -mv ${projectResultsDir}/multiqc_report.html ${projectResultsDir}/${project}_multiqc_report.html +mv "${projectResultsDir}/multiqc_report.html" "${projectResultsDir}/${project}_multiqc_report.html" diff --git a/protocols/OUTRIDER.sh b/protocols/OUTRIDER.sh index 7bd6fc5..d50cc5c 100755 --- a/protocols/OUTRIDER.sh +++ b/protocols/OUTRIDER.sh @@ -25,7 +25,7 @@ do mkdir -p "${projectResultsDir}/outrider/${sample}/QC" done #run outrider QC part -singularity exec --pwd $PWD --bind "${sifDir}:/sifDir,/apps:/apps,/groups:/groups" \ +singularity exec --pwd "${PWD}" --bind "${sifDir}:/sifDir,/apps:/apps,/groups:/groups" \ "${sifDir}/${outriderVersion}" \ Rscript "${EBROOTNGS_RNA}/scripts/outrider-qc.R" \ "${projectHTseqExpressionTable}" \ @@ -45,7 +45,7 @@ do #get geneOfInterest from samplessheet of provided, #or else get top 3 most significate genes from outrider output. GENE="${geneOfInterest[samplenumber]}" - if [[ ! -z "${GENE}" ]] + if [[ -n "${GENE}" ]] then echo "${GENE}" > "${projectResultsDir}/outrider/${sample}/${sample}.genesOfInterest.tsv" else @@ -55,7 +55,7 @@ do done #Run outrider to genarate plots per sample, and top 3 significant gene. -singularity exec --pwd $PWD --bind "${sifDir}:/sifDir,/apps:/apps,/groups:/groups" \ +singularity exec --pwd "${PWD}" --bind "${sifDir}:/sifDir,/apps:/apps,/groups:/groups" \ "${sifDir}/${outriderVersion}" \ Rscript "${EBROOTNGS_RNA}/scripts/outrider.R" \ "${projectResultsDir}/outrider/" diff --git a/protocols/QCStats.sh b/protocols/QCStats.sh index 2360b76..eb87b19 100755 --- a/protocols/QCStats.sh +++ b/protocols/QCStats.sh @@ -40,9 +40,6 @@ module load "${pythonVersion}" module load "${ngsVersion}" module list -makeTmpDir "${intermediateDir}" -tmpIntermediateDir="${MC_tmpFile}" - # Get strandness. STRANDED="$(num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)"; num2="$(tail -n 2 "${strandedness}" | awk '{print $7}' | tail -n 1)"; if (( $(echo "$num1 > 0.6" | bc -l) )); then echo "SECOND_READ_TRANSCRIPTION_STRAND"; fi; if (( $(echo "$num2 > 0.6" | bc -l) )); then echo "FIRST_READ_TRANSCRIPTION_STRAND"; fi; if (( $(echo "$num1 < 0.6 && $num2 < 0.6" | bc -l) )); then echo "NONE"; fi)" @@ -52,15 +49,15 @@ then echo -e "generate CollectMultipleMetrics" # Picard CollectMultipleMetrics - java -jar -Xmx6g -XX:ParallelGCThreads=4 "${EBROOTPICARD}/${picardJar}" CollectMultipleMetrics \ - I="${sampleMergedDedupBam}" \ - O="${collectMultipleMetricsPrefix}" \ - R="${indexSpecies}" \ - PROGRAM=CollectAlignmentSummaryMetrics \ - PROGRAM=QualityScoreDistribution \ - PROGRAM=MeanQualityByCycle \ - PROGRAM=CollectInsertSizeMetrics \ - TMP_DIR="${tempDir}/processing" + java -jar -Xmx6g -XX:ParallelGCThreads=4 "${EBROOTPICARD}/${picardJar}" CollectMultipleMetrics \ + I="${sampleMergedDedupBam}" \ + O="${collectMultipleMetricsPrefix}" \ + R="${indexSpecies}" \ + PROGRAM=CollectAlignmentSummaryMetrics \ + PROGRAM=QualityScoreDistribution \ + PROGRAM=MeanQualityByCycle \ + PROGRAM=CollectInsertSizeMetrics \ + TMP_DIR="${tempDir}/processing" #Flagstat for reads mapping to the genome. @@ -85,32 +82,32 @@ then elif [[ "${seqType}" == "SR" ]] then - #Flagstat for reads mapping to the genome. - samtools flagstat "${sampleMergedDedupBam}" > "${flagstatMetrics}" + #Flagstat for reads mapping to the genome. + samtools flagstat "${sampleMergedDedupBam}" > "${flagstatMetrics}" # Fagstats idxstats, reads per chr. - samtools idxstats "${sampleMergedDedupBam}" > "${idxstatsMetrics}" + samtools idxstats "${sampleMergedDedupBam}" > "${idxstatsMetrics}" echo -e "generate CollectMultipleMetrics" - # Picard CollectMultipleMetrics - java -jar -Xmx6g -XX:ParallelGCThreads=4 "${EBROOTPICARD}/${picardJar}" CollectMultipleMetrics \ - I="${sampleMergedDedupBam}" \ - O="${collectMultipleMetricsPrefix}" \ - R="${indexSpecies}" \ - PROGRAM=CollectAlignmentSummaryMetrics \ - PROGRAM=QualityScoreDistribution \ - PROGRAM=MeanQualityByCycle \ - PROGRAM=CollectInsertSizeMetrics \ - TMP_DIR="${tempDir}/processing" - - #CollectRnaSeqMetrics.jar - java -XX:ParallelGCThreads=4 -jar -Xmx6g "${EBROOTPICARD}/${picardJar}" CollectRnaSeqMetrics \ - REF_FLAT="${annotationRefFlat}" \ - I="${sampleMergedDedupBam}" \ - STRAND_SPECIFICITY="${STRANDED}" \ - RIBOSOMAL_INTERVALS="${annotationIntervalList}" \ - VALIDATION_STRINGENCY=LENIENT \ - O="${rnaSeqMetrics}" \ - CHART_OUTPUT="${rnaSeqMetrics}.pdf" + # Picard CollectMultipleMetrics + java -jar -Xmx6g -XX:ParallelGCThreads=4 "${EBROOTPICARD}/${picardJar}" CollectMultipleMetrics \ + I="${sampleMergedDedupBam}" \ + O="${collectMultipleMetricsPrefix}" \ + R="${indexSpecies}" \ + PROGRAM=CollectAlignmentSummaryMetrics \ + PROGRAM=QualityScoreDistribution \ + PROGRAM=MeanQualityByCycle \ + PROGRAM=CollectInsertSizeMetrics \ + TMP_DIR="${tempDir}/processing" + + #CollectRnaSeqMetrics.jar + java -XX:ParallelGCThreads=4 -jar -Xmx6g "${EBROOTPICARD}/${picardJar}" CollectRnaSeqMetrics \ + REF_FLAT="${annotationRefFlat}" \ + I="${sampleMergedDedupBam}" \ + STRAND_SPECIFICITY="${STRANDED}" \ + RIBOSOMAL_INTERVALS="${annotationIntervalList}" \ + VALIDATION_STRINGENCY=LENIENT \ + O="${rnaSeqMetrics}" \ + CHART_OUTPUT="${rnaSeqMetrics}.pdf" fi diff --git a/protocols/STAR.sh b/protocols/STAR.sh index b837927..16f7a2b 100755 --- a/protocols/STAR.sh +++ b/protocols/STAR.sh @@ -25,15 +25,12 @@ module list makeTmpDir "${intermediateDir}" tmpintermediateDir=${MC_tmpFile} -echo "## $(date) Start $0" -echo "ID (internalSampleID-lane): ${externalSampleID}" - if [[ "${seqType}" == 'SR' ]] then - echo "seqType = "${seqType}";FastQ: ${mergedSingleBarcodeFqGz}" + echo "seqType = ${seqType}; FastQ: ${mergedSingleBarcodeFqGz}" inputs="--readFilesIn ${mergedSingleBarcodeFqGz}" else - echo "seqType = "${seqType}";FastQs: ${mergedLeftBarcodeFqGz} ${mergedRightBarcodeFqGz}" + echo "seqType = ${seqType}; FastQs: ${mergedLeftBarcodeFqGz} ${mergedRightBarcodeFqGz}" inputs="--readFilesIn ${mergedLeftBarcodeFqGz} ${mergedRightBarcodeFqGz}" fi @@ -62,4 +59,3 @@ echo "STAR for RNA" mv -f "${tmpintermediateDir}/${externalSampleID}."* "${intermediateDir}" echo "succes moving files"; -echo "## $(date) ## $0 Done " diff --git a/protocols/SplitAndTrim.sh b/protocols/SplitAndTrim.sh index c6aa646..93ee94a 100755 --- a/protocols/SplitAndTrim.sh +++ b/protocols/SplitAndTrim.sh @@ -35,10 +35,6 @@ module load "${samtoolsVersion}" #check modules module list -echo "## $(date) Start $0" - -echo "Running split and trim:" - java -Xmx10g -XX:ParallelGCThreads=2 \ -Djava.io.tmpdir="${tmpTmpDataDir}" \ -jar "${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" SplitNCigarReads \ @@ -52,12 +48,8 @@ mv "${tmpsplitAndTrimBai}" "${splitAndTrimBai}" # Create md5sum for zip file -cd "${intermediateDir}" +cd "${intermediateDir}" || exit md5sum "${splitAndTrimShortBam}" > "${splitAndTrimShortBam}.md5" md5sum "${splitAndTrimShortBai}" > "${splitAndTrimShortBai}.md5" -echo "returncode: $?"; -echo "succes moving files"; -cd - - -echo "## $(date) ## $0 Done " +cd - || exit diff --git a/protocols/Stranded.sh b/protocols/Stranded.sh index b5d37a6..a285527 100755 --- a/protocols/Stranded.sh +++ b/protocols/Stranded.sh @@ -17,15 +17,8 @@ makeTmpDir "${strandedness}" tmpStrandedness=${MC_tmpFile} -echo "## $(date) Start $0" - -i=$(ls "${intermediateDir}"/*.Aligned.sortedByCoord.out.bam -1 |shuf -n 1) +i=$(find "${intermediateDir}" -name "*.Aligned.sortedByCoord.out.bam" | shuf -n 1) infer_experiment.py -r "${bed12}" -i "${i}" > "${tmpStrandedness}" mv "${tmpStrandedness}" "${strandedness}" - -echo "succes moving files"; -echo "## $(date) ## $0 Done " - - diff --git a/protocols/TIN.sh b/protocols/TIN.sh index eada468..b796d8d 100755 --- a/protocols/TIN.sh +++ b/protocols/TIN.sh @@ -16,13 +16,11 @@ #string tmpName #string logsDir -echo "## $(date) Start $0" - makeTmpDir "${intermediateDir}" tmpintermediateDir=${MC_tmpFile} mkdir -p "${tinDir}" -cd "${tinDir}" +cd "${tinDir}" || exit # Extract the alignment of housekeeping genes. module load "${bedToolsVersion}" @@ -44,7 +42,5 @@ tin.py \ mv "${tmpintermediateDir}/${externalSampleID}"* "${intermediateDir}" -cd - +cd - || exit -echo "succes moving files"; -echo "## $(date) ## $0 Done " diff --git a/protocols/GatkMergeGvcf.sh b/protocols/deprecated/GatkMergeGvcf.sh similarity index 87% rename from protocols/GatkMergeGvcf.sh rename to protocols/deprecated/GatkMergeGvcf.sh index 1623bc3..fa8fc2b 100755 --- a/protocols/GatkMergeGvcf.sh +++ b/protocols/deprecated/GatkMergeGvcf.sh @@ -29,7 +29,7 @@ array_contains () { local seeking="${2}" local in=1 for element in "${!array-}"; do - if [[ "$element" == "$seeking" ]]; then + if [[ "${element}" == "${seeking}" ]]; then in=0 break fi @@ -38,19 +38,17 @@ array_contains () { } #Load modules -module load ${gatkVersion} +module load "${gatkVersion}" #Check modules module list -echo "## $(date) Start $0" - INPUTS=() ALLGVCFs=() for external in "${externalSampleID[@]}" do - array_contains INPUTS "${external}" || INPUTS+=("$external") # If vcfFile does not exist in array add it + array_contains INPUTS "${external}" || INPUTS+=("${external}") # If vcfFile does not exist in array add it done SAMPLESIZE="${#INPUTS[@]}" @@ -95,8 +93,8 @@ if [[ "${gvcfSize}" -ne 0 ]] then for i in "${tmpProjectBatchCombinedVariantCalls}".* do - mv "${i}" "${intermediateDir}"/$(basename "${i}") - echo "mv $i ${intermediateDir}/$(basename $i)" + mv "${i}" "${intermediateDir}/$(basename "${i}")" + echo "mv ${i} ${intermediateDir}/$(basename "${i}")" done else echo "nothing to move! There are no samples, maybe there is something going wrong, or maybe chrX or chrY are not in the bed file" diff --git a/protocols/filter_Leafcutter.sh b/protocols/deprecated/filter_Leafcutter.sh similarity index 93% rename from protocols/filter_Leafcutter.sh rename to protocols/deprecated/filter_Leafcutter.sh index f03ab60..2345d1d 100755 --- a/protocols/filter_Leafcutter.sh +++ b/protocols/deprecated/filter_Leafcutter.sh @@ -1,3 +1,4 @@ + #MOLGENIS nodes=1 ppn=1 mem=34gb walltime=05:00:00 #Parameter mapping @@ -18,8 +19,6 @@ module load "${python2Version}" module list - -INPUTFILE="${intermediateDir}${externalSampleID}.leafcutter.outlier_cluster_significance.txt" OUTPUTFILE="${intermediateDir}${externalSampleID}.leafcutter.report.tsv" # adding coordinates to leafcutter results diff --git a/protocols/rMATs.sh b/protocols/rMATs.sh index a287fbc..992cff6 100755 --- a/protocols/rMATs.sh +++ b/protocols/rMATs.sh @@ -29,7 +29,7 @@ rm -r "${rMATsOutputDir}/${externalSampleID}/tmp/" while read -r line do # reading each line - read name status <<< "${line}" + read -r name status <<< "${line}" if [[ "${status}" == "sample" ]] then echo "${name} is a ${status} : in ${externalSampleID}.B1.txt" From cc5b95e23d3533a231b7b1e0a0975a00556d009f Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 12 Jan 2023 10:25:55 +0100 Subject: [PATCH 12/42] added pipefail, fixes for shellcheck --- check/shellcheck.sh | 13 ++-- protocols/AddOrReplaceReadGroups.sh | 2 +- protocols/BQSR.sh | 1 + protocols/CombineFastq.sh | 1 + protocols/CopyPrmTmpData.sh | 1 + protocols/CopyToResultsDir.sh | 1 + protocols/CreateExternSamplesProjects.sh | 1 + protocols/CreateInhouseRnaSeqProjects.sh | 1 + protocols/DE.sh | 2 + protocols/Design.sh | 1 + protocols/FastQC.sh | 1 + protocols/GatkGenotypeGvcf.sh | 1 + protocols/GatkHaplotypeCallerGvcf.sh | 1 + protocols/HTSeq_count.sh | 24 ++---- protocols/IndelRealignment.sh | 1 + protocols/Leafcutter.sh | 14 +++- protocols/Leafcutter_filter.sh | 1 + protocols/Leafcutter_intron_clustering.sh | 9 +-- protocols/Leafcutter_rare.sh | 1 + protocols/MakeExpressionTable.sh | 1 + protocols/MarkDuplicates.sh | 1 + protocols/MergeBam.sh | 1 + protocols/MultiQCReport.sh | 94 +++++++++++------------ protocols/OUTRIDER.sh | 1 + protocols/QCStats.sh | 6 +- protocols/RNASeQC.sh | 1 + protocols/RNASeQCPlots.sh | 1 + protocols/STAR.sh | 1 + protocols/SplitAndTrim.sh | 1 + protocols/Stranded.sh | 1 + protocols/TIN.sh | 1 + protocols/TrimReads_TrimGalore.sh | 1 + protocols/VIP.sh | 17 ++-- protocols/annotate_SJ_with_SJDB.sh | 1 + protocols/create_SJDB.sh | 1 + protocols/normalize_SJ.sh | 1 + protocols/rMATs.sh | 8 +- protocols/rMats_reformat.sh | 1 + 38 files changed, 127 insertions(+), 90 deletions(-) diff --git a/check/shellcheck.sh b/check/shellcheck.sh index 6f2d5a8..23bc1d9 100755 --- a/check/shellcheck.sh +++ b/check/shellcheck.sh @@ -7,10 +7,12 @@ # * SC2015: Note that A && B || C is not if-then-else. C may run when A is true. # We know and use this construct regularly to create "transactions" # where C is only executed when both A and B have succeeded. -# * SC2148 Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. Not needed for Molgenis Compute protocols. -# * SC2154 Not deeded, Molgenis Compute takes care of this. +# * SC2154: Due to Molgenis Compute string initialization the warning is not valid # -export SHELLCHECK_OPTS="-e SC2004 -e SC2015 -e SC2148 -e SC2154" +# * SC2148: the shebang is declared in the header.ftl not in the protocols +# + +export SHELLCHECK_OPTS="-e SC2004 -e SC2015 -e SC2154 -e SC2148" function showHelp() { # @@ -33,7 +35,7 @@ EOH # # Parse commandline options # -while getopts "hv" opt +while getopts ":hv" opt do case "${opt}" in h) @@ -89,7 +91,8 @@ then # https://github.com/koalaman/shellcheck/wiki/SC${ISSUENUMBER} # explaining what is wrong with the code / style and how to improve it. # - perl -pi -e "s|message='([^']+)'\s+source='ShellCheck.(SC[0-9]+)'|message='<a href="https://github.com/koalaman/shellcheck/wiki/\$2">\$2: \$1</a>' source='ShellCheck.\$2'|" checkstyle-result.xml + perl -pi -e "s|message='([^']+)'\s+source='ShellCheck.(SC[0-9]+)'|message='<a href="https://github.com/koalaman/shellcheck/wiki/\$2">\$2: +\$1</a>' source='ShellCheck.\$2'|" checkstyle-result.xml else # # ShellCheck for regular user on the commandline. diff --git a/protocols/AddOrReplaceReadGroups.sh b/protocols/AddOrReplaceReadGroups.sh index 5fac966..d41868e 100755 --- a/protocols/AddOrReplaceReadGroups.sh +++ b/protocols/AddOrReplaceReadGroups.sh @@ -1,4 +1,4 @@ -#!/bin/bash +set -o pipefail #MOLGENIS nodes=1 ppn=8 mem=8gb walltime=05:00:00 #string project diff --git a/protocols/BQSR.sh b/protocols/BQSR.sh index 825aa1d..eba2f22 100755 --- a/protocols/BQSR.sh +++ b/protocols/BQSR.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=8 mem=15gb walltime=23:59:00 #string project diff --git a/protocols/CombineFastq.sh b/protocols/CombineFastq.sh index 7817a2c..9fc3596 100644 --- a/protocols/CombineFastq.sh +++ b/protocols/CombineFastq.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=8gb ppn=6 #Parameter mapping diff --git a/protocols/CopyPrmTmpData.sh b/protocols/CopyPrmTmpData.sh index 761a4bf..d3a4c0a 100755 --- a/protocols/CopyPrmTmpData.sh +++ b/protocols/CopyPrmTmpData.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=01:59:00 mem=4gb #string allRawNgstmpDataDir diff --git a/protocols/CopyToResultsDir.sh b/protocols/CopyToResultsDir.sh index 7fa671e..8ee5487 100755 --- a/protocols/CopyToResultsDir.sh +++ b/protocols/CopyToResultsDir.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 nodes=1 cores=1 mem=4gb #Parameter mapping diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index c22799c..c4644da 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=02:00:00 mem=4gb #list seqType diff --git a/protocols/CreateInhouseRnaSeqProjects.sh b/protocols/CreateInhouseRnaSeqProjects.sh index 88dc334..e3ee4c7 100755 --- a/protocols/CreateInhouseRnaSeqProjects.sh +++ b/protocols/CreateInhouseRnaSeqProjects.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=10-23:59:00 mem=2gb ppn=2 #list seqType diff --git a/protocols/DE.sh b/protocols/DE.sh index f0e0923..b30ac6a 100755 --- a/protocols/DE.sh +++ b/protocols/DE.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=4 mem=4gb walltime=05:59:00 #Parameter mapping @@ -41,6 +42,7 @@ cd "${intermediateDir}" || exit #detect number of conditions col=$(col="condition"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n "${col}") +# shellcheck disable=SC2206 colArray=(${col//:/ }) conditionCount=$(tail -n +2 "${projectJobsDir}/${project}.csv" | cut -d "," -f "${colArray[0]}" | sort | uniq | wc -l) diff --git a/protocols/Design.sh b/protocols/Design.sh index ba33b14..041e16f 100755 --- a/protocols/Design.sh +++ b/protocols/Design.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=4 mem=4gb walltime=05:59:00 #Parameter mapping diff --git a/protocols/FastQC.sh b/protocols/FastQC.sh index 509589f..5b519d8 100755 --- a/protocols/FastQC.sh +++ b/protocols/FastQC.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=1gb walltime=05:00:00 diff --git a/protocols/GatkGenotypeGvcf.sh b/protocols/GatkGenotypeGvcf.sh index 19ad9f7..1fc52f4 100755 --- a/protocols/GatkGenotypeGvcf.sh +++ b/protocols/GatkGenotypeGvcf.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=17gb ppn=3 nodes=1 #string gatkVersion diff --git a/protocols/GatkHaplotypeCallerGvcf.sh b/protocols/GatkHaplotypeCallerGvcf.sh index 2349314..e7b724e 100755 --- a/protocols/GatkHaplotypeCallerGvcf.sh +++ b/protocols/GatkHaplotypeCallerGvcf.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=12gb ppn=8 nodes=1 #string gatkVersion diff --git a/protocols/HTSeq_count.sh b/protocols/HTSeq_count.sh index cd6c0b6..b25ccc9 100755 --- a/protocols/HTSeq_count.sh +++ b/protocols/HTSeq_count.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6gb #Parameter mapping @@ -29,23 +30,14 @@ ROWNR=$(wc -l "${strandedness}" | awk '{ print $1 }') if [[ "${ROWNR}" == 6 ]] then - num1=$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1) - num2=$(tail -n 2 "${strandedness}" | awk '{print $7}' | tail -n 1) - if [[ "${num1}" > 0.6 ]] - then - STRANDED="yes" - elif [[ "${num2}" > 0.6 ]] - then - STRANDED="reverse" - elif [[ "${num1}" < 0.6 && "${num2}" < 0.6 ]] - then - STRANDED="no" - else - STRANDED="no" - fi + num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" + num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" + + STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "yes"}else if($2 > 0.6){print "reverse"}else if($1 < 0.6 && $2 < 0.6){print "no"} }') + else - echo "strandedness detection failed" - STRANDED='no' + echo "strandedness detection failed, STRANDED='yes'" + STRANDED='yes' fi echo -e "\nQuantifying expression, with strandedness: ${STRANDED}" diff --git a/protocols/IndelRealignment.sh b/protocols/IndelRealignment.sh index 246fa3a..80b5050 100755 --- a/protocols/IndelRealignment.sh +++ b/protocols/IndelRealignment.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=8 mem=11gb walltime=23:59:00 #string project diff --git a/protocols/Leafcutter.sh b/protocols/Leafcutter.sh index ca6c5d6..276c903 100755 --- a/protocols/Leafcutter.sh +++ b/protocols/Leafcutter.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=34gb walltime=05:00:00 #Parameter mapping @@ -22,10 +23,14 @@ module load "${python2Version}" module list # detect strand for RegTools -STRANDED="$(num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)"; num2="$(tail -n 2 "${strandedness}" | awk '{print $7}' | tail -n 1)"; if (( $(echo "$num1 > 0.6" | bc -l) )); then echo "1"; fi; if (( $(echo "$num2 > 0.6" | bc -l) )); then echo "2"; fi; if (( $(echo "$num1 < 0.6 && $num2 < 0.6" | bc -l) )); then echo "0"; fi)" +num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" +num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" + +STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "1"}else if($2 > 0.6){print "2"}else if($1 < 0.6 && $2 < 0.6){print "0"} }') #detect number of conditions -col=$(col="condition"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n $col) +col=$(col="condition"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n "${col}") +# shellcheck disable=SC2206 colArray=(${col//:/ }) conditionCount=$(tail -n +2 "${projectJobsDir}/${project}.csv" | cut -d "," -f "${colArray[0]}" | sort | uniq | wc -l) @@ -33,9 +38,10 @@ echo -e "\nWith strandedness type: ${STRANDED}, where (0 = unstranded, 1 = first-strand/RF, 2, = second-strand/FR)." echo "create group_list" -col=$(col="externalSampleID"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n $col) +col=$(col="externalSampleID"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n "${col}") +# shellcheck disable=SC2206 colID=(${col//:/ }) -awk -F',' -v id=${colID[0]} -v con=${colArray[0]} '{print $id".sorted.merged.bam\t"$con}' "${projectJobsDir}/${project}.csv" \ +awk -F',' -v id="${colID[0]}" -v con="${colArray[0]}" '{print ${id}".sorted.merged.bam\t"${con}}' "${projectJobsDir}/${project}.csv" \ > "${intermediateDir}${project}_groups_file.txt" sed 1d "${intermediateDir}${project}_groups_file.txt" > "${intermediateDir}${project}"_groups_file.txt.tmp diff --git a/protocols/Leafcutter_filter.sh b/protocols/Leafcutter_filter.sh index b22b072..5adecba 100755 --- a/protocols/Leafcutter_filter.sh +++ b/protocols/Leafcutter_filter.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=34gb walltime=05:00:00 #Parameter mapping diff --git a/protocols/Leafcutter_intron_clustering.sh b/protocols/Leafcutter_intron_clustering.sh index bafb37c..4505c8e 100755 --- a/protocols/Leafcutter_intron_clustering.sh +++ b/protocols/Leafcutter_intron_clustering.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=34gb walltime=05:00:00 #Parameter mapping @@ -16,12 +17,10 @@ module load "${python2Version}" module list # detect strand for RegTools -STRANDED="$(num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)"; num2="$(tail -n 2 "${strandedness}" | awk '{print $7}' | tail -n 1)"; if (( $(echo "$num1 > 0.6" | bc -l) )); then echo "1"; fi; if (( $(echo "$num2 > 0.6" | bc -l) )); then echo "2"; fi; if (( $(echo "$num1 < 0.6 && $num2 < 0.6" | bc -l) )); then echo "0"; fi)" +num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" +num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" -#detect number of conditions -col=$(col="condition"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n $col) -colArray=(${col//:/ }) -conditionCount=$(tail -n +2 "${projectJobsDir}/${project}.csv" | cut -d "," -f "${colArray[0]}" | sort | uniq | wc -l) +STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "1"}else if($2 > 0.6){print "2"}else if($1 < 0.6 && $2 < 0.6){print "0"} }') echo -e "\nWith strandedness type: ${STRANDED}, where (0 = unstranded, 1 = first-strand/RF, 2, = second-strand/FR)." diff --git a/protocols/Leafcutter_rare.sh b/protocols/Leafcutter_rare.sh index 223f36b..4288da1 100755 --- a/protocols/Leafcutter_rare.sh +++ b/protocols/Leafcutter_rare.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=34gb walltime=05:00:00 #Parameter mapping diff --git a/protocols/MakeExpressionTable.sh b/protocols/MakeExpressionTable.sh index 10aeb40..af53be9 100755 --- a/protocols/MakeExpressionTable.sh +++ b/protocols/MakeExpressionTable.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=2gb #string intermediateDir diff --git a/protocols/MarkDuplicates.sh b/protocols/MarkDuplicates.sh index 8ae64c1..ec68e75 100755 --- a/protocols/MarkDuplicates.sh +++ b/protocols/MarkDuplicates.sh @@ -1,3 +1,4 @@ +set -o pipefail #!/bin/bash #MOLGENIS walltime=23:59:00 mem=8gb ppn=6 diff --git a/protocols/MergeBam.sh b/protocols/MergeBam.sh index 6c633e3..fa5f5c5 100755 --- a/protocols/MergeBam.sh +++ b/protocols/MergeBam.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=8gb ppn=6 #Parameter mapping diff --git a/protocols/MultiQCReport.sh b/protocols/MultiQCReport.sh index 557a0f8..08024b4 100755 --- a/protocols/MultiQCReport.sh +++ b/protocols/MultiQCReport.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=5gb walltime=03:00:00 #Parameter mapping @@ -38,55 +39,54 @@ cat > "${intermediateDir}/${project}_QC_config.yaml" <<'_EOF' report_header_info: - - 'Contact E-mail' : '${contact}' - - 'Pipeline Version' : '${ngsVersion}' - - 'Project' : '${project}' - - 'prepKit' : '${prepKit}' - - '' : '' - - 'Used toolversions' : ' ' - - '' : '' - - '' : ${jdkVersion} - - '' : ${fastqcVersion} - - '' : ${starVersion} - - '' : ${samtoolsVersion} - - '' : ${rVersion} - - '' : ${wkhtmltopdfVersion} - - '' : ${picardVersion} - - '' : ${htseqVersion} - - '' : ${pythonVersion} - - '' : ${gatkVersion} - - '' : ${multiqcVersion} - - '' : '' - - 'pipeline description' : '' - - 'Gene expression quantification' : '' - - '' : 'The trimmed fastQ files where aligned to build ${indexFileID} reference genome using' - - '' : '${starVersion} [1] allowing for 2 mismatches. Before gene quantification' - - '' : '${samtoolsVersion} [2] was used to sort the aligned reads.' - - '' : 'The gene level quantification was performed by ${htseqVersion} [3] using --mode=union' - - '' : '--stranded=no and, Ensembl version 75 was used as gene annotation database which is included' - - '' : 'in folder expression/.' - - '' : '' - - 'QC metrics' : '' - - '' : 'Quality control (QC) metrics are calculated for the raw sequencing data. This is done using' - - '' : 'the tool FastQC FastQC [4]. QC metrics are calculated for the aligned reads using' - - '' : 'Picard-tools [5] CollectRnaSeqMetrics, MarkDuplicates, CollectInsertSize-' - - '' : 'Metrics and SAMtools flagstat.These QC metrics form the basis in this final QC report.' - - '' : '' - - 'references' : '' - - '' : '' - - '' : '1. Dobin A, Davis C a, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M,' - - '' : 'Gingeras TR: STAR: ultrafast universal RNA-seq aligner. Bioinformatics 2013, 29:15–21.' - - '' : '2. Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R,' - - '' : 'Subgroup 1000 Genome Project Data Processing: The Sequence Alignment/Map format and SAMtools.' - - '' : 'Bioinforma 2009, 25 (16):2078–2079.' - - '' : '3. Anders S, Pyl PT, Huber W: HTSeq – A Python framework to work with high-throughput sequencing data' - - '' : 'HTSeq – A Python framework to work with high-throughput sequencing data. 2014:0–5.' - - '' : '4. Andrews, S. (2010). FastQC a Quality Control Tool for High Throughput Sequence Data [Online].' - - '' : 'Available online at: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ ${samtoolsVersion}' - - '' : '5. Picard Sourceforge Web site. http://picard.sourceforge.net/ ${picardVersion}' +- 'Contact E-mail' : '${contact}' +- 'Pipeline Version' : '${ngsVersion}' +- 'Project' : '${project}' +- 'prepKit' : '${prepKit}' +- '' : '' +- 'Used toolversions' : ' ' +- '' : '' +- '' : ${jdkVersion} +- '' : ${fastqcVersion} +- '' : ${starVersion} +- '' : ${samtoolsVersion} +- '' : ${rVersion} +- '' : ${wkhtmltopdfVersion} +- '' : ${picardVersion} +- '' : ${htseqVersion} +- '' : ${pythonVersion} +- '' : ${gatkVersion} +- '' : ${multiqcVersion} +- '' : '' +- 'pipeline description' : '' +- 'Gene expression quantification' : '' +- '' : 'The trimmed fastQ files where aligned to build ${indexFileID} reference genome using' +- '' : '${starVersion} [1] allowing for 2 mismatches. Before gene quantification' +- '' : '${samtoolsVersion} [2] was used to sort the aligned reads.' +- '' : 'The gene level quantification was performed by ${htseqVersion} [3] using --mode=union' +- '' : '--stranded=no and, Ensembl version 75 was used as gene annotation database which is included' +- '' : 'in folder expression/.' +- '' : '' +- 'QC metrics' : '' +- '' : 'Quality control (QC) metrics are calculated for the raw sequencing data. This is done using' +- '' : 'the tool FastQC FastQC [4]. QC metrics are calculated for the aligned reads using' +- '' : 'Picard-tools [5] CollectRnaSeqMetrics, MarkDuplicates, CollectInsertSize-' +- '' : 'Metrics and SAMtools flagstat.These QC metrics form the basis in this final QC report.' +- '' : '' +- 'references' : '' +- '' : '' +- '' : '1. Dobin A, Davis C a, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M,' +- '' : 'Gingeras TR: STAR: ultrafast universal RNA-seq aligner. Bioinformatics 2013, 29:15–21.' +- '' : '2. Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R,' +- '' : 'Subgroup 1000 Genome Project Data Processing: The Sequence Alignment/Map format and SAMtools.' +- '' : 'Bioinforma 2009, 25 (16):2078–2079.' +- '' : '3. Anders S, Pyl PT, Huber W: HTSeq – A Python framework to work with high-throughput sequencing data' +- '' : 'HTSeq – A Python framework to work with high-throughput sequencing data. 2014:0–5.' +- '' : '4. Andrews, S. (2010). FastQC a Quality Control Tool for High Throughput Sequence Data [Online].' +- '' : 'Available online at: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ ${samtoolsVersion}' +- '' : '5. Picard Sourceforge Web site. http://picard.sourceforge.net/ ${picardVersion}' _EOF - # generate multiqc QC rapport singularity exec --bind "${intermediateDir}:/intermediateDir,${projectResultsDir}:/projectResultsDir" "${sifDir}${multiqcVersion}" \ diff --git a/protocols/OUTRIDER.sh b/protocols/OUTRIDER.sh index d50cc5c..ff6dbfe 100755 --- a/protocols/OUTRIDER.sh +++ b/protocols/OUTRIDER.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=5:00:00 nodes=1 cores=1 mem=50gb #string intermediateDir diff --git a/protocols/QCStats.sh b/protocols/QCStats.sh index eb87b19..0dda1d9 100755 --- a/protocols/QCStats.sh +++ b/protocols/QCStats.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=8gb walltime=05:59:00 #Parameter mapping @@ -41,7 +42,10 @@ module load "${ngsVersion}" module list # Get strandness. -STRANDED="$(num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)"; num2="$(tail -n 2 "${strandedness}" | awk '{print $7}' | tail -n 1)"; if (( $(echo "$num1 > 0.6" | bc -l) )); then echo "SECOND_READ_TRANSCRIPTION_STRAND"; fi; if (( $(echo "$num2 > 0.6" | bc -l) )); then echo "FIRST_READ_TRANSCRIPTION_STRAND"; fi; if (( $(echo "$num1 < 0.6 && $num2 < 0.6" | bc -l) )); then echo "NONE"; fi)" +num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" +num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" + +STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "SECOND_READ_TRANSCRIPTION_STRAND"}else if($2 > 0.6){print "FIRST_READ_TRANSCRIPTION_STRAND"}else if($1 < 0.6 && $2 < 0.6){print "NONE"} }') #If paired-end do fastqc for both ends, else only for one if [[ "${seqType}" == "PE" ]] diff --git a/protocols/RNASeQC.sh b/protocols/RNASeQC.sh index a59105c..235b21a 100755 --- a/protocols/RNASeQC.sh +++ b/protocols/RNASeQC.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=5:59:00 mem=4gb ppn=1 #Parameter mapping diff --git a/protocols/RNASeQCPlots.sh b/protocols/RNASeQCPlots.sh index fd16306..597db27 100755 --- a/protocols/RNASeQCPlots.sh +++ b/protocols/RNASeQCPlots.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=1gb walltime=05:00:00 #Parameter mapping diff --git a/protocols/STAR.sh b/protocols/STAR.sh index 16f7a2b..f098102 100755 --- a/protocols/STAR.sh +++ b/protocols/STAR.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=8 mem=40gb walltime=23:00:00 #Parameter mapping diff --git a/protocols/SplitAndTrim.sh b/protocols/SplitAndTrim.sh index 93ee94a..ce9f759 100755 --- a/protocols/SplitAndTrim.sh +++ b/protocols/SplitAndTrim.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=2 mem=10gb walltime=23:59:00 #string project diff --git a/protocols/Stranded.sh b/protocols/Stranded.sh index a285527..e850e45 100755 --- a/protocols/Stranded.sh +++ b/protocols/Stranded.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=1 mem=10gb walltime=05:00:00 #Parameter mapping diff --git a/protocols/TIN.sh b/protocols/TIN.sh index b796d8d..b429729 100755 --- a/protocols/TIN.sh +++ b/protocols/TIN.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=40gb ppn=1 #Parameter mapping diff --git a/protocols/TrimReads_TrimGalore.sh b/protocols/TrimReads_TrimGalore.sh index d06b6b1..92c2e30 100755 --- a/protocols/TrimReads_TrimGalore.sh +++ b/protocols/TrimReads_TrimGalore.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS nodes=1 ppn=4 mem=4gb walltime=23:59:00 #Parameter mapping diff --git a/protocols/VIP.sh b/protocols/VIP.sh index 253c26f..39f4fc1 100755 --- a/protocols/VIP.sh +++ b/protocols/VIP.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=05:59:00 mem=5gb ppn=1 nodes=1 #string tmpTmpDataDir @@ -16,27 +17,23 @@ #string spliceaiIndel #string spliceaiSnv -makeTmpDir "${projectBatchGenotypedVIPPrefix}" -tmpProjectBatchGenotypedVIPPrefix=${MC_tmpFile} - #Load modules module load "${vipVersion}" #Check modules module list - cp "$EBROOTVIP/config/default.cfg" "${intermediateDir}/vip.config" + cp "${EBROOTVIP}/config/default.cfg" "${intermediateDir}/vip.config" echo "annotate_vep_plugin_SpliceAI=${spliceaiSnv},${spliceaiIndel}" >> "${intermediateDir}/vip.config" - echo "## "$(date)" Start $0" - cd "${EBROOTVIP}" + cd "${EBROOTVIP}" || exit bash pipeline.sh \ -c "${intermediateDir}/vip.config" \ -i "${projectBatchGenotypedVariantCalls}" \ -o "${projectBatchGenotypedVIPPrefix}.vcf.gz" - cd - + cd - || exit printf "VIP ..done\n" - cd "${intermediateDir}" - md5sum $(basename "${projectBatchGenotypedVIPPrefix}.vcf.gz")> $(basename "${projectBatchGenotypedVIPPrefix}.vcf.gz").md5 - cd - + cd "${intermediateDir}" || exit + md5sum "$(basename "${projectBatchGenotypedVIPPrefix}.vcf.gz")" > "$(basename "${projectBatchGenotypedVIPPrefix}.vcf.gz").md5" + cd - || exit echo "succes moving files" diff --git a/protocols/annotate_SJ_with_SJDB.sh b/protocols/annotate_SJ_with_SJDB.sh index 99b16e9..4734051 100755 --- a/protocols/annotate_SJ_with_SJDB.sh +++ b/protocols/annotate_SJ_with_SJDB.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=5:59:00 mem=4gb ppn=1 #Parameter mapping diff --git a/protocols/create_SJDB.sh b/protocols/create_SJDB.sh index 4a5442f..f9b40b2 100755 --- a/protocols/create_SJDB.sh +++ b/protocols/create_SJDB.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=4gb ppn=4 #Parameter mapping diff --git a/protocols/normalize_SJ.sh b/protocols/normalize_SJ.sh index c15666c..71881df 100755 --- a/protocols/normalize_SJ.sh +++ b/protocols/normalize_SJ.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=4gb ppn=1 #Parameter mapping diff --git a/protocols/rMATs.sh b/protocols/rMATs.sh index 992cff6..0d723b6 100755 --- a/protocols/rMATs.sh +++ b/protocols/rMATs.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=4gb ppn=4 #Parameter mapping @@ -41,8 +42,13 @@ do echo "${status}" done < "${intermediateDir}${externalSampleID}.SJ.design.tsv" + # Get strandness. -STRANDED="$(num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)"; num2="$(tail -n 2 "${strandedness}" | awk '{print $7}' | tail -n 1)"; if (( $(echo "$num1 > 0.6" | bc -l) )); then echo "fr-secondstrand"; fi; if (( $(echo "$num2 > 0.6" | bc -l) )); then echo "fr-firststrand"; fi; if (( $(echo "$num1 < 0.6 && $num2 < 0.6" | bc -l) )); then echo "fr-unstranded"; fi)" + +num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" +num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" + +STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "fr-secondstrand"}else if($2 > 0.6){print "fr-firststrand"}else if($1 < 0.6 && $2 < 0.6){print "fr-unstranded"} }') singularity exec --bind "${intermediateDir}":/intermediateDir,/apps:/apps,/groups:/groups "${sifDir}/${rMATsVersion}" python /rmats/rmats.py \ --b1 "/intermediateDir/${externalSampleID}.B1.txt" --b2 "/intermediateDir/${externalSampleID}.B2.txt" \ diff --git a/protocols/rMats_reformat.sh b/protocols/rMats_reformat.sh index 460a83f..cffd82f 100755 --- a/protocols/rMats_reformat.sh +++ b/protocols/rMats_reformat.sh @@ -1,3 +1,4 @@ +set -o pipefail #MOLGENIS walltime=23:59:00 mem=4gb ppn=4 #Parameter mapping From e4783525641906a5714824bdcdfd0e82a3cf781e Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 13 Jan 2023 11:48:35 +0000 Subject: [PATCH 13/42] removed unused tools, added gene name to ht_seq output --- parameters.gearshift.csv | 1 + parameters.homo_sapiens.GRCh37.csv | 28 +++++++++++++--------------- parameters.talos.csv | 10 ++++++++++ protocols/HTSeq_count.sh | 1 + protocols/MultiQCReport.sh | 3 --- protocols/QCStats.sh | 1 - 6 files changed, 25 insertions(+), 19 deletions(-) create mode 100755 parameters.talos.csv diff --git a/parameters.gearshift.csv b/parameters.gearshift.csv index 6f4ab1f..a300de9 100755 --- a/parameters.gearshift.csv +++ b/parameters.gearshift.csv @@ -7,3 +7,4 @@ root,/apps/ tempDir,${workDir}/${tmpName}/tmp/ permanentDir,${workDir}/prm03 toolchain,foss-2018b +GCCcoreVersion,GCCcore-7.3.0 diff --git a/parameters.homo_sapiens.GRCh37.csv b/parameters.homo_sapiens.GRCh37.csv index b3b4dab..0531e74 100755 --- a/parameters.homo_sapiens.GRCh37.csv +++ b/parameters.homo_sapiens.GRCh37.csv @@ -4,36 +4,34 @@ checkStage,module list jobname,jobname ##### Tools and versions ##### -jdkVersion,Java/8-LTS -ngsUtilsVersion,ngs-utils/19.03.3-GCCcore-7.3.0 +jdkVersion,Java/11-LTS +ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} ngsRNAVersion,NGS_RNA/beta fastqcVersion,FastQC/0.11.8-Java-11-LTS -samtoolsVersion,SAMtools/1.9-GCCcore-7.3.0 -bedToolsVersion,BEDTools/2.28.0-GCCcore-7.3.0 +samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} +bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} sambambaVersion,sambamba/0.7.0 rVersion,R/3.6.1-${toolchain}-bare rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 picardVersion,picard/2.20.5-Java-11-LTS -htseqVersion,HTSeq/0.11.0-GCCcore-7.3.0-Python-3.7.4 +htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 hisatVersion,hisat2/2.1.0-${toolchain} -htsLibVersion,HTSlib/1.11-GCCcore-7.3.0 -pythonVersion,Python/3.7.4-GCCcore-7.3.0-bare -python2Version,Python/2.7.16-GCCcore-7.3.0-bare +htsLibVersion,HTSlib/1.11-${GCCcoreVersion} +pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare +python2Version,Python/2.7.16-${GCCcoreVersion}-bare python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -wkhtmltopdfVersion,wkhtmltopdf/0.11.0_rc1-static-amd64 -anacondaVersion,Anaconda/1.8.0-Linux-x86_64 starVersion,STAR/2.7.3a-${toolchain} picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles -trimGaloreVersion,TrimGalore/0.4.5-GCCcore-7.3.0-Python-3.7.4-bare -cutadaptVersion,cutadapt/2.6-GCCcore-7.3.0-Python-3.7.4-bare -rSeQCVersion,RSeQC/3.0.1-GCCcore-7.3.0-Python-3.7.4 -leafcutterVersion,leafcutter/aa12b1e-foss-2018b -vipVersion,vip/v3.3.1-foss-2018b +trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare +cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare +rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 +leafcutterVersion,leafcutter/aa12b1e-${toolchain} +vipVersion,vip/v3.3.1-${toolchain} outriderVersion,outrider_latest.sif rMATsVersion,rmats_v4.1.2.sif sifDir,/apps/data/GAD/singularity/ diff --git a/parameters.talos.csv b/parameters.talos.csv new file mode 100755 index 0000000..3600453 --- /dev/null +++ b/parameters.talos.csv @@ -0,0 +1,10 @@ +#### ENVIRONMENT VARIABLES #### +queue,atd +workDir,/groups/${groupname}/ +dataDir,${root}/data/ +tmpName,tmp01 +root,/apps/ +tempDir,${workDir}/${tmpName}/tmp/ +permanentDir,${workDir}/prm08 +toolchain,foss-2022a +GCCcoreVersion,GCCcore-11.3.0 diff --git a/protocols/HTSeq_count.sh b/protocols/HTSeq_count.sh index b25ccc9..7e8ad2b 100755 --- a/protocols/HTSeq_count.sh +++ b/protocols/HTSeq_count.sh @@ -48,6 +48,7 @@ samtools \ htseq-count \ -m union \ -s "${STRANDED}" \ + -i gene_name - \ "${annotationGtf}" \ > "${tmpSampleHTseqExpressionText}" diff --git a/protocols/MultiQCReport.sh b/protocols/MultiQCReport.sh index 08024b4..e56718b 100755 --- a/protocols/MultiQCReport.sh +++ b/protocols/MultiQCReport.sh @@ -14,12 +14,10 @@ set -o pipefail #string gcPlotList #string seqType #string rVersion -#string wkhtmltopdfVersion #string fastqcVersion #string samtoolsVersion #string picardVersion #string multiqcVersion -#string anacondaVersion #string starVersion #string indexFileID #string ensembleReleaseVersion @@ -51,7 +49,6 @@ report_header_info: - '' : ${starVersion} - '' : ${samtoolsVersion} - '' : ${rVersion} -- '' : ${wkhtmltopdfVersion} - '' : ${picardVersion} - '' : ${htseqVersion} - '' : ${pythonVersion} diff --git a/protocols/QCStats.sh b/protocols/QCStats.sh index 0dda1d9..14f64b1 100755 --- a/protocols/QCStats.sh +++ b/protocols/QCStats.sh @@ -23,7 +23,6 @@ set -o pipefail #string alignmentMetrics #string externalSampleID #string picardVersion -#string anacondaVersion #string samtoolsVersion #string ngsVersion #string pythonVersion From 9972d1f9c9628b8fa0ac75e94a1f959ce0425914 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 13 Jan 2023 11:52:58 +0000 Subject: [PATCH 14/42] . --- parameters.homo_sapiens.GRCh37.solve-rd.csv | 30 ++++++++++----------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/parameters.homo_sapiens.GRCh37.solve-rd.csv b/parameters.homo_sapiens.GRCh37.solve-rd.csv index bfe8680..47ea2dd 100755 --- a/parameters.homo_sapiens.GRCh37.solve-rd.csv +++ b/parameters.homo_sapiens.GRCh37.solve-rd.csv @@ -4,39 +4,37 @@ checkStage,module list jobname,jobname ##### Tools and versions ##### -jdkVersion,Java/8-LTS -ngsUtilsVersion,ngs-utils/19.03.3-GCCcore-7.3.0 +jdkVersion,Java/11-LTS +ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} ngsRNAVersion,NGS_RNA/beta fastqcVersion,FastQC/0.11.8-Java-11-LTS -samtoolsVersion,SAMtools/1.9-GCCcore-7.3.0 -bedToolsVersion,BEDTools/2.28.0-GCCcore-7.3.0 +samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} +bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} sambambaVersion,sambamba/0.7.0 rVersion,R/3.6.1-${toolchain}-bare rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 picardVersion,picard/2.20.5-Java-11-LTS -htseqVersion,HTSeq/0.11.0-GCCcore-7.3.0-Python-3.7.4 +htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 hisatVersion,hisat2/2.1.0-${toolchain} -htsLibVersion,HTSlib/1.11-GCCcore-7.3.0 -pythonVersion,Python/3.7.4-GCCcore-7.3.0-bare -python2Version,Python/2.7.16-GCCcore-7.3.0-bare +htsLibVersion,HTSlib/1.11-${GCCcoreVersion} +pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare +python2Version,Python/2.7.16-${GCCcoreVersion}-bare python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -wkhtmltopdfVersion,wkhtmltopdf/0.11.0_rc1-static-amd64 -anacondaVersion,Anaconda/1.8.0-Linux-x86_64 starVersion,STAR/2.7.3a-${toolchain} picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles -trimGaloreVersion,TrimGalore/0.4.5-GCCcore-7.3.0-Python-3.7.4-bare -cutadaptVersion,cutadapt/2.6-GCCcore-7.3.0-Python-3.7.4-bare -rSeQCVersion,RSeQC/3.0.1-GCCcore-7.3.0-Python-3.7.4 -leafcutterVersion,leafcutter/aa12b1e-foss-2018b -vipVersion,vip/v3.3.1-foss-2018b +trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare +cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare +rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 +leafcutterVersion,leafcutter/aa12b1e-${toolchain} +vipVersion,vip/v3.3.1-${toolchain} outriderVersion,outrider_latest.sif rMATsVersion,rmats_v4.1.2.sif -sifDir,/groups/umcg-solve-rd/tmp01/resources/GAD/singularity/ +sifDir,/apps/data/GAD/singularity/ rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif ##### GENERAL DIRECTORIES ##### From 4d3c4d2e3dbfbfc75723eb6e0349ef5c701c94de Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 13 Jan 2023 11:53:23 +0000 Subject: [PATCH 15/42] new parameters --- parameters.homo_sapiens.hg19.solve-rd.csv | 30 +++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/parameters.homo_sapiens.hg19.solve-rd.csv b/parameters.homo_sapiens.hg19.solve-rd.csv index d7b4b65..160036a 100755 --- a/parameters.homo_sapiens.hg19.solve-rd.csv +++ b/parameters.homo_sapiens.hg19.solve-rd.csv @@ -4,39 +4,37 @@ checkStage,module list jobname,jobname ##### Tools and versions ##### -jdkVersion,Java/8-LTS -ngsUtilsVersion,ngs-utils/19.03.3-GCCcore-7.3.0 +jdkVersion,Java/11-LTS +ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} ngsRNAVersion,NGS_RNA/beta fastqcVersion,FastQC/0.11.8-Java-11-LTS -samtoolsVersion,SAMtools/1.9-GCCcore-7.3.0 -bedToolsVersion,BEDTools/2.28.0-GCCcore-7.3.0 +samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} +bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} sambambaVersion,sambamba/0.7.0 rVersion,R/3.6.1-${toolchain}-bare rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 picardVersion,picard/2.20.5-Java-11-LTS -htseqVersion,HTSeq/0.11.0-GCCcore-7.3.0-Python-3.7.4 +htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 hisatVersion,hisat2/2.1.0-${toolchain} -htsLibVersion,HTSlib/1.11-GCCcore-7.3.0 -pythonVersion,Python/3.7.4-GCCcore-7.3.0-bare -python2Version,Python/2.7.16-GCCcore-7.3.0-bare +htsLibVersion,HTSlib/1.11-${GCCcoreVersion} +pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare +python2Version,Python/2.7.16-${GCCcoreVersion}-bare python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -wkhtmltopdfVersion,wkhtmltopdf/0.11.0_rc1-static-amd64 -anacondaVersion,Anaconda/1.8.0-Linux-x86_64 starVersion,STAR/2.7.3a-${toolchain} picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles -trimGaloreVersion,TrimGalore/0.4.5-GCCcore-7.3.0-Python-3.7.4-bare -cutadaptVersion,cutadapt/2.6-GCCcore-7.3.0-Python-3.7.4-bare -rSeQCVersion,RSeQC/3.0.1-GCCcore-7.3.0-Python-3.7.4 -leafcutterVersion,leafcutter/aa12b1e-foss-2018b -vipVersion,vip/v3.3.1-foss-2018b +trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare +cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare +rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 +leafcutterVersion,leafcutter/aa12b1e-${toolchain} +vipVersion,vip/v3.3.1-${toolchain} outriderVersion,outrider_latest.sif rMATsVersion,rmats_v4.1.2.sif -sifDir,/groups/umcg-solve-rd/tmp01/resources/GAD/singularity/ +sifDir,/apps/data/GAD/singularity/ rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif ##### GENERAL DIRECTORIES ##### From 0a2cfb802fcb51097de911f9247ab94cf08cb589 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 12 Jan 2023 13:03:04 +0100 Subject: [PATCH 16/42] bugfixes --- protocols/BQSR.sh | 2 +- protocols/Leafcutter.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/protocols/BQSR.sh b/protocols/BQSR.sh index eba2f22..4c1cbce 100755 --- a/protocols/BQSR.sh +++ b/protocols/BQSR.sh @@ -36,7 +36,7 @@ module list echo "Running GATK BQSR:" -jan -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ +java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ "${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" BaseRecalibrator \ -R "${indexFile}" \ -I "${splitAndTrimBam}" \ diff --git a/protocols/Leafcutter.sh b/protocols/Leafcutter.sh index 276c903..b672fb1 100755 --- a/protocols/Leafcutter.sh +++ b/protocols/Leafcutter.sh @@ -41,7 +41,7 @@ echo "create group_list" col=$(col="externalSampleID"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n "${col}") # shellcheck disable=SC2206 colID=(${col//:/ }) -awk -F',' -v id="${colID[0]}" -v con="${colArray[0]}" '{print ${id}".sorted.merged.bam\t"${con}}' "${projectJobsDir}/${project}.csv" \ +awk -F',' -v id="${colID[0]}" -v con="${colArray[0]}" '{print ${id}".sorted.merged.bam\t"$con}' "${projectJobsDir}/${project}.csv" \ > "${intermediateDir}${project}_groups_file.txt" sed 1d "${intermediateDir}${project}_groups_file.txt" > "${intermediateDir}${project}"_groups_file.txt.tmp From 7df5f78e4a307e086db0940701b0489f7c8d4c79 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 13 Jan 2023 12:34:24 +0000 Subject: [PATCH 17/42] removed unused tool --- protocols/CopyToResultsDir.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/protocols/CopyToResultsDir.sh b/protocols/CopyToResultsDir.sh index 8ee5487..7b50f4a 100755 --- a/protocols/CopyToResultsDir.sh +++ b/protocols/CopyToResultsDir.sh @@ -19,7 +19,6 @@ set -o pipefail #string fastqcVersion #string samtoolsVersion #string rVersion -#string wkhtmltopdfVersion #string picardVersion #string starVersion #string htseqVersion @@ -38,10 +37,6 @@ set -o pipefail #string tmpName #string logsDir -# Change permissions - -umask 0007 - # Make result directories mkdir -p "${projectResultsDir}/alignment" mkdir -p "${projectResultsDir}/fastqc" From 594abf897d91b20b26103c26a1f8f1ab82f2c3e7 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 13 Jan 2023 15:59:53 +0000 Subject: [PATCH 18/42] fix for getting the correct gene_name out of a GTF --- scripts/annotate_SJ_with_genes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/annotate_SJ_with_genes.py b/scripts/annotate_SJ_with_genes.py index 407864e..58c2cef 100755 --- a/scripts/annotate_SJ_with_genes.py +++ b/scripts/annotate_SJ_with_genes.py @@ -38,9 +38,12 @@ continue line = line.strip() tabs = line.split('\t') + arr_genetotal = line.split('"') gene = line.split('"')[1] chrm, start, end = tabs[0], int(tabs[3]), int(tabs[4]) - gene = line.split('"')[1] + # get col number of 'gene_name' in arr_genetotal, add 1 to it \ + # to get the actual 'gene_name value' from the next array value. + gene = line.split('"')[arr_genetotal.index("; gene_name ")+1] if gene not in genes_dict: genes_dict[gene] = [chrm, start, end] elif chrm == genes_dict[gene][0]: From b0df409c0cf921a74a6e253782db0af17e8315ad Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Tue, 17 Jan 2023 12:24:16 +0000 Subject: [PATCH 19/42] bugs --- protocols/HTSeq_count.sh | 2 +- protocols/Leafcutter.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/protocols/HTSeq_count.sh b/protocols/HTSeq_count.sh index 7e8ad2b..04e0478 100755 --- a/protocols/HTSeq_count.sh +++ b/protocols/HTSeq_count.sh @@ -48,7 +48,7 @@ samtools \ htseq-count \ -m union \ -s "${STRANDED}" \ - -i gene_name + -i gene_name \ - \ "${annotationGtf}" \ > "${tmpSampleHTseqExpressionText}" diff --git a/protocols/Leafcutter.sh b/protocols/Leafcutter.sh index b672fb1..f94158f 100755 --- a/protocols/Leafcutter.sh +++ b/protocols/Leafcutter.sh @@ -41,7 +41,7 @@ echo "create group_list" col=$(col="externalSampleID"; head -n1 "${projectJobsDir}/${project}.csv" | tr "," "\n" | grep -n "${col}") # shellcheck disable=SC2206 colID=(${col//:/ }) -awk -F',' -v id="${colID[0]}" -v con="${colArray[0]}" '{print ${id}".sorted.merged.bam\t"$con}' "${projectJobsDir}/${project}.csv" \ +awk -F',' -v id="${colID[0]}" -v con="${colArray[0]}" '{print $id".sorted.merged.bam\t"$con}' "${projectJobsDir}/${project}.csv" \ > "${intermediateDir}${project}_groups_file.txt" sed 1d "${intermediateDir}${project}_groups_file.txt" > "${intermediateDir}${project}"_groups_file.txt.tmp From 218d3e85f3d468a9eaf0e9852df94f5053bf478a Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 20 Jan 2023 12:13:14 +0000 Subject: [PATCH 20/42] added hc parameters, and moved toolVersions to host params --- parameters.gearshift.csv | 34 ++++++++++++++++ parameters.homo_sapiens.GRCh37.csv | 36 +---------------- parameters.homo_sapiens.GRCh37.solve-rd.csv | 35 +--------------- parameters.hyperchicken.csv | 44 +++++++++++++++++++++ 4 files changed, 80 insertions(+), 69 deletions(-) create mode 100755 parameters.hyperchicken.csv diff --git a/parameters.gearshift.csv b/parameters.gearshift.csv index a300de9..abf8467 100755 --- a/parameters.gearshift.csv +++ b/parameters.gearshift.csv @@ -8,3 +8,37 @@ tempDir,${workDir}/${tmpName}/tmp/ permanentDir,${workDir}/prm03 toolchain,foss-2018b GCCcoreVersion,GCCcore-7.3.0 + +##### Tools and versions ##### +jdkVersion,Java/11-LTS +ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsRNAVersion,NGS_RNA/beta +fastqcVersion,FastQC/0.11.8-Java-11-LTS +samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} +bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} +sambambaVersion,sambamba/0.7.0 +rVersion,R/3.6.1-${toolchain}-bare +rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 +picardVersion,picard/2.20.5-Java-11-LTS +htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 +hisatVersion,hisat2/2.1.0-${toolchain} +htsLibVersion,HTSlib/1.11-${GCCcoreVersion} +pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare +python2Version,Python/2.7.16-${GCCcoreVersion}-bare +python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 +pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 +gatkVersion,GATK/4.1.4.1-Java-8-LTS +multiqcVersion,multiqc_v1.12.sif +starVersion,STAR/2.7.3a-${toolchain} +picardJar,picard.jar +gatkJar,gatk-package-4.1.4.1-local.jar +mergeSamFilesJar,MergeSamFiles +trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare +cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare +rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 +leafcutterVersion,leafcutter/aa12b1e-${toolchain} +vipVersion,vip/v3.3.1-${toolchain} +outriderVersion,outrider_latest.sif +rMATsVersion,rmats_v4.1.2.sif +sifDir,/apps/data/GAD/singularity/ +rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif diff --git a/parameters.homo_sapiens.GRCh37.csv b/parameters.homo_sapiens.GRCh37.csv index 0531e74..07ad53b 100755 --- a/parameters.homo_sapiens.GRCh37.csv +++ b/parameters.homo_sapiens.GRCh37.csv @@ -3,40 +3,6 @@ stage,module load checkStage,module list jobname,jobname -##### Tools and versions ##### -jdkVersion,Java/11-LTS -ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} -ngsRNAVersion,NGS_RNA/beta -fastqcVersion,FastQC/0.11.8-Java-11-LTS -samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} -bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} -sambambaVersion,sambamba/0.7.0 -rVersion,R/3.6.1-${toolchain}-bare -rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 -picardVersion,picard/2.20.5-Java-11-LTS -htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 -hisatVersion,hisat2/2.1.0-${toolchain} -htsLibVersion,HTSlib/1.11-${GCCcoreVersion} -pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare -python2Version,Python/2.7.16-${GCCcoreVersion}-bare -python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 -pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 -gatkVersion,GATK/4.1.4.1-Java-8-LTS -multiqcVersion,multiqc_v1.12.sif -starVersion,STAR/2.7.3a-${toolchain} -picardJar,picard.jar -gatkJar,gatk-package-4.1.4.1-local.jar -mergeSamFilesJar,MergeSamFiles -trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare -cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare -rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 -leafcutterVersion,leafcutter/aa12b1e-${toolchain} -vipVersion,vip/v3.3.1-${toolchain} -outriderVersion,outrider_latest.sif -rMATsVersion,rmats_v4.1.2.sif -sifDir,/apps/data/GAD/singularity/ -rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif - ##### GENERAL DIRECTORIES ##### tmpDataDir,${workDir}/${tmpName}/ tmpTmpDataDir,${tmpDataDir}/tmp/ @@ -164,7 +130,7 @@ gtexJunc,/apps/data/GAD/others/GTEx_v7_bflb_junctions.gct omimList,/apps/data/GAD/others/OMIM2.list indexFile,${indexSpecies} dbsnpVcf,/apps/data/dbSNP/dbsnp_137.b37.vcf -starIndex,/apps/data/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/STAR-2.7.3a-foss-2018b/ +starIndex,/apps/data/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/STAR-2.7.3a-${GCCcoreVersion}/ #### GENOME VARIABLES #### genome,GRCh37 ensembleReleaseVersion,75 diff --git a/parameters.homo_sapiens.GRCh37.solve-rd.csv b/parameters.homo_sapiens.GRCh37.solve-rd.csv index 47ea2dd..1b403ad 100755 --- a/parameters.homo_sapiens.GRCh37.solve-rd.csv +++ b/parameters.homo_sapiens.GRCh37.solve-rd.csv @@ -3,40 +3,6 @@ stage,module load checkStage,module list jobname,jobname -##### Tools and versions ##### -jdkVersion,Java/11-LTS -ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} -ngsRNAVersion,NGS_RNA/beta -fastqcVersion,FastQC/0.11.8-Java-11-LTS -samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} -bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} -sambambaVersion,sambamba/0.7.0 -rVersion,R/3.6.1-${toolchain}-bare -rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 -picardVersion,picard/2.20.5-Java-11-LTS -htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 -hisatVersion,hisat2/2.1.0-${toolchain} -htsLibVersion,HTSlib/1.11-${GCCcoreVersion} -pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare -python2Version,Python/2.7.16-${GCCcoreVersion}-bare -python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 -pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 -gatkVersion,GATK/4.1.4.1-Java-8-LTS -multiqcVersion,multiqc_v1.12.sif -starVersion,STAR/2.7.3a-${toolchain} -picardJar,picard.jar -gatkJar,gatk-package-4.1.4.1-local.jar -mergeSamFilesJar,MergeSamFiles -trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare -cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare -rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 -leafcutterVersion,leafcutter/aa12b1e-${toolchain} -vipVersion,vip/v3.3.1-${toolchain} -outriderVersion,outrider_latest.sif -rMATsVersion,rmats_v4.1.2.sif -sifDir,/apps/data/GAD/singularity/ -rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif - ##### GENERAL DIRECTORIES ##### tmpDataDir,/groups/umcg-solve-rd/tmp01/rna/ tmpTmpDataDir,/groups/umcg-solve-rd/tmp01/rna/tmp/ @@ -186,3 +152,4 @@ annotationFile,${dataDir}Ensembl/GrCh37.75/pub/release-75/gtf/${speciesFileName} ensembleDir,${dataDir}/ftp.broadinstitute.org/bundle/2.8/${genome}/gtf/${speciesFileName}/ spliceaiSnv,/apps/data/SpliceAI/GRCh37/spliceai_scores.raw.snv.vcf.gz spliceaiIndel,/apps/data/SpliceAI/GRCh37/spliceai_scores.raw.indel.vcf.gz +leafcutterAllExon,/apps/data/GAD/others/gencode_GRCh37_all_exons.txt.gz diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv new file mode 100755 index 0000000..0c0ab32 --- /dev/null +++ b/parameters.hyperchicken.csv @@ -0,0 +1,44 @@ +#### ENVIRONMENT VARIABLES #### +queue,atd +workDir,/groups/${groupname}/ +dataDir,${root}/data/ +tmpName,tmp01 +root,/apps/ +tempDir,${workDir}/${tmpName}/tmp/ +permanentDir,${workDir}/prm03 +toolchain,foss-2022a +GCCcoreVersion,GCCcore-11.3.0 + +##### Tools and versions ##### +jdkVersion,Java/11-LTS +ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsRNAVersion,NGS_RNA/beta +fastqcVersion,FastQC/0.11.8-Java-11-LTS +samtoolsVersion,SAMtools/1.16.1-${GCCcoreVersion} +bedToolsVersion,BEDTools/2.30.0-${GCCcoreVersion} +sambambaVersion,sambamba/0.7.0 +rVersion,R/4.2.1-${toolchain}-bare +rPlusVersion,RPlus/4.2.1-${toolchain}-v21.10.1 +picardVersion,picard/2.26.10-Java-8-LTS +htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.10.4 +hisatVersion,hisat2/2.1.0-${toolchain} +htsLibVersion,HTSlib/1.16-${GCCcoreVersion} +pythonVersion,Python/3.10.4-${GCCcoreVersion}-bare +python2Version,Python/2.7.16-${GCCcoreVersion}-bare +python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 +pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 +gatkVersion,GATK/4.1.4.1-Java-8-LTS +multiqcVersion,multiqc_v1.12.sif +starVersion,STAR/2.7.3a-${toolchain} +picardJar,picard.jar +gatkJar,gatk-package-4.1.4.1-local.jar +mergeSamFilesJar,MergeSamFiles +trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.10.4-bare +cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.10.4-bare +rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.10.4 +leafcutterVersion,leafcutter/aa12b1e-${toolchain} +vipVersion,vip/v3.3.1-${toolchain} +outriderVersion,outrider_latest.sif +rMATsVersion,rmats_v4.1.2.sif +sifDir,/apps/data/GAD/singularity/ +rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif From 655b31b43d573b8d4da1dbf8747989f88ece80b5 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Wed, 1 Feb 2023 09:20:58 +0000 Subject: [PATCH 21/42] versions update --- parameters.hyperchicken.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 0c0ab32..9e47809 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -34,7 +34,7 @@ picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.10.4-bare -cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.10.4-bare +cutadaptVersion,cutadapt/4.2-${GCCcoreVersion}-Python-3.10.4-bare rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.10.4 leafcutterVersion,leafcutter/aa12b1e-${toolchain} vipVersion,vip/v3.3.1-${toolchain} From d2336abb04dbc71c07d7b35837371ec96a8e0fa6 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 2 Feb 2023 07:51:35 +0000 Subject: [PATCH 22/42] fix hostparams --- parameters.fender.csv | 35 +++++++++++++++++++++++++++++++++++ parameters.gearshift.csv | 2 +- parameters.talos.csv | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/parameters.fender.csv b/parameters.fender.csv index c4e5c48..5c32e60 100755 --- a/parameters.fender.csv +++ b/parameters.fender.csv @@ -7,3 +7,38 @@ root,/apps/ tempDir,${workDir}/${tmpName}/tmp/ permanentDir,${workDir}/prm10 toolchain,foss-2018b +GCCcoreVersion,GCCcore-11.3.0 + +##### Tools and versions ##### +jdkVersion,Java/11-LTS +ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsRNAVersion,NGS_RNA/beta +fastqcVersion,FastQC/0.11.8-Java-11-LTS +samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} +bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} +sambambaVersion,sambamba/0.7.0 +rVersion,R/3.6.1-${toolchain}-bare +rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 +picardVersion,picard/2.20.5-Java-11-LTS +htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 +hisatVersion,hisat2/2.1.0-${toolchain} +htsLibVersion,HTSlib/1.11-${GCCcoreVersion} +pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare +python2Version,Python/2.7.16-${GCCcoreVersion}-bare +python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 +pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 +gatkVersion,GATK/4.1.4.1-Java-8-LTS +multiqcVersion,multiqc_v1.12.sif +starVersion,STAR/2.7.3a-${toolchain} +picardJar,picard.jar +gatkJar,gatk-package-4.1.4.1-local.jar +mergeSamFilesJar,MergeSamFiles +trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare +cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare +rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 +leafcutterVersion,leafcutter/aa12b1e-${toolchain} +vipVersion,vip/v3.3.1-${toolchain} +outriderVersion,outrider_latest.sif +rMATsVersion,rmats_v4.1.2.sif +sifDir,/apps/data/GAD/singularity/ +rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif diff --git a/parameters.gearshift.csv b/parameters.gearshift.csv index abf8467..a39eb5e 100755 --- a/parameters.gearshift.csv +++ b/parameters.gearshift.csv @@ -7,7 +7,7 @@ root,/apps/ tempDir,${workDir}/${tmpName}/tmp/ permanentDir,${workDir}/prm03 toolchain,foss-2018b -GCCcoreVersion,GCCcore-7.3.0 +GCCcoreVersion,GCCcore-11.3.0 ##### Tools and versions ##### jdkVersion,Java/11-LTS diff --git a/parameters.talos.csv b/parameters.talos.csv index 3600453..2014da4 100755 --- a/parameters.talos.csv +++ b/parameters.talos.csv @@ -8,3 +8,37 @@ tempDir,${workDir}/${tmpName}/tmp/ permanentDir,${workDir}/prm08 toolchain,foss-2022a GCCcoreVersion,GCCcore-11.3.0 + +##### Tools and versions ##### +jdkVersion,Java/11-LTS +ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsRNAVersion,NGS_RNA/beta +fastqcVersion,FastQC/0.11.8-Java-11-LTS +samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} +bedToolsVersion,BEDTools/2.28.0-${GCCcoreVersion} +sambambaVersion,sambamba/0.7.0 +rVersion,R/3.6.1-${toolchain}-bare +rPlusVersion,RPlus/3.6.1-${toolchain}-v21.10.1 +picardVersion,picard/2.20.5-Java-11-LTS +htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.7.4 +hisatVersion,hisat2/2.1.0-${toolchain} +htsLibVersion,HTSlib/1.11-${GCCcoreVersion} +pythonVersion,Python/3.7.4-${GCCcoreVersion}-bare +python2Version,Python/2.7.16-${GCCcoreVersion}-bare +python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 +pythonPlusVersion,PythonPlus/3.7.4-${toolchain}-v20.02.1 +gatkVersion,GATK/4.1.4.1-Java-8-LTS +multiqcVersion,multiqc_v1.12.sif +starVersion,STAR/2.7.3a-${toolchain} +picardJar,picard.jar +gatkJar,gatk-package-4.1.4.1-local.jar +mergeSamFilesJar,MergeSamFiles +trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.7.4-bare +cutadaptVersion,cutadapt/2.6-${GCCcoreVersion}-Python-3.7.4-bare +rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.7.4 +leafcutterVersion,leafcutter/aa12b1e-${toolchain} +vipVersion,vip/v3.3.1-${toolchain} +outriderVersion,outrider_latest.sif +rMATsVersion,rmats_v4.1.2.sif +sifDir,/apps/data/GAD/singularity/ +rnaseqcVersion,${sifDir}/rnaseqc_2.4.2.sif From adf16e56d51b58f4f3271e867b30a2c2fc48ca3b Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 2 Feb 2023 08:48:58 +0000 Subject: [PATCH 23/42] version update --- parameters.fender.csv | 2 +- parameters.gearshift.csv | 2 +- parameters.hyperchicken.csv | 2 +- parameters.talos.csv | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/parameters.fender.csv b/parameters.fender.csv index 5c32e60..a284f54 100755 --- a/parameters.fender.csv +++ b/parameters.fender.csv @@ -11,7 +11,7 @@ GCCcoreVersion,GCCcore-11.3.0 ##### Tools and versions ##### jdkVersion,Java/11-LTS -ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsUtilsVersion,ngs-utils/22.10.1 ngsRNAVersion,NGS_RNA/beta fastqcVersion,FastQC/0.11.8-Java-11-LTS samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} diff --git a/parameters.gearshift.csv b/parameters.gearshift.csv index a39eb5e..388a220 100755 --- a/parameters.gearshift.csv +++ b/parameters.gearshift.csv @@ -11,7 +11,7 @@ GCCcoreVersion,GCCcore-11.3.0 ##### Tools and versions ##### jdkVersion,Java/11-LTS -ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsUtilsVersion,ngs-utils/22.10.1 ngsRNAVersion,NGS_RNA/beta fastqcVersion,FastQC/0.11.8-Java-11-LTS samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 9e47809..5987a2c 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -11,7 +11,7 @@ GCCcoreVersion,GCCcore-11.3.0 ##### Tools and versions ##### jdkVersion,Java/11-LTS -ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsUtilsVersion,ngs-utils/22.10.1 ngsRNAVersion,NGS_RNA/beta fastqcVersion,FastQC/0.11.8-Java-11-LTS samtoolsVersion,SAMtools/1.16.1-${GCCcoreVersion} diff --git a/parameters.talos.csv b/parameters.talos.csv index 2014da4..e581302 100755 --- a/parameters.talos.csv +++ b/parameters.talos.csv @@ -11,7 +11,7 @@ GCCcoreVersion,GCCcore-11.3.0 ##### Tools and versions ##### jdkVersion,Java/11-LTS -ngsUtilsVersion,ngs-utils/19.03.3-${GCCcoreVersion} +ngsUtilsVersion,ngs-utils/22.10.1 ngsRNAVersion,NGS_RNA/beta fastqcVersion,FastQC/0.11.8-Java-11-LTS samtoolsVersion,SAMtools/1.9-${GCCcoreVersion} From 4ea51890f3e4412dfdba2b4beaef4d03b30dc897 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Thu, 2 Feb 2023 09:03:36 +0000 Subject: [PATCH 24/42] remove deprecated script --- protocols/CreateExternSamplesProjects.sh | 6 +----- protocols/CreateInhouseRnaSeqProjects.sh | 7 +------ 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index c4644da..a7a09b3 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -90,11 +90,7 @@ cd "${ROCKETPOINT}" || exit module load "${ngsVersion}" -# -# Create subset of samples for this project. -# - -extract_samples_from_GAF_list.pl --i "${worksheet}" --o "${projectJobsDir}/${project}.csv" --c project --q "${project}" +cp "${worksheet}" "${projectJobsDir}/${project}.csv" # # Execute MOLGENIS/compute to create job scripts to analyse this project. diff --git a/protocols/CreateInhouseRnaSeqProjects.sh b/protocols/CreateInhouseRnaSeqProjects.sh index e3ee4c7..6734323 100755 --- a/protocols/CreateInhouseRnaSeqProjects.sh +++ b/protocols/CreateInhouseRnaSeqProjects.sh @@ -94,12 +94,7 @@ done cd "${ROCKETPOINT}" || exit -# -# Create subset of samples for this project. -# - - -extract_samples_from_GAF_list.pl --i "${worksheet}" --o "${projectJobsDir}/${project}.csv" --c project --q "${project}" +cp "${worksheet}" "${projectJobsDir}/${project}.csv" # # Execute MOLGENIS/compute to create job scripts to analyse this project. From e8e3b8b5b4fe7dcb1958b11c40247787d1fbcaca Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Fri, 17 Feb 2023 08:20:59 +0000 Subject: [PATCH 25/42] . --- parameters.hyperchicken.csv | 2 +- test/test_pipeline.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 5987a2c..60db264 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -2,7 +2,7 @@ queue,atd workDir,/groups/${groupname}/ dataDir,${root}/data/ -tmpName,tmp01 +tmpName,tmp09 root,/apps/ tempDir,${workDir}/${tmpName}/tmp/ permanentDir,${workDir}/prm03 diff --git a/test/test_pipeline.sh b/test/test_pipeline.sh index 982e9c2..8838d2a 100644 --- a/test/test_pipeline.sh +++ b/test/test_pipeline.sh @@ -81,7 +81,7 @@ function checkIfFinished(){ echo "${_projectName} test succeeded!" echo "" } -tmpdirectory="tmp01" +tmpdirectory="tmp09" groupName="umcg-atd" NGS_RNA_VERSION="NGS_DNA/betaAutotest" @@ -119,7 +119,7 @@ tail -1 workflow_STAR.csv | perl -p -e 's|,|\t|g' | awk '{print "s15_Autotest,te perl -pi -e 's|s09_OUTRIDER|#s09_OUTRIDER|g' test_workflow_STAR.csv perl -pi -e 's|s12_VIP|#s12_VIP|g' test_workflow_STAR.csv -cp "${pipelinefolder}/test/results/"* "/groups/umcg-atd/tmp01/tmp/NGS_RNA/testdata_true/" +cp "${pipelinefolder}/test/results/"* "${workfolder}/tmp/NGS_RNA/testdata_true/" preparePipeline From ac6079aa921b66aeecac72408952a8468a149c52 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 17 Feb 2023 08:48:15 +0000 Subject: [PATCH 26/42] trimgalore version --- parameters.hyperchicken.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 60db264..37845c8 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -33,7 +33,7 @@ starVersion,STAR/2.7.3a-${toolchain} picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles -trimGaloreVersion,TrimGalore/0.4.5-${GCCcoreVersion}-Python-3.10.4-bare +trimGaloreVersion,TrimGalore/0.6.7-${GCCcoreVersion} cutadaptVersion,cutadapt/4.2-${GCCcoreVersion}-Python-3.10.4-bare rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.10.4 leafcutterVersion,leafcutter/aa12b1e-${toolchain} From ca212c90a85836b8fd07fa63496f07cbd1bca771 Mon Sep 17 00:00:00 2001 From: gerbenvandervries Date: Fri, 17 Feb 2023 09:10:41 +0000 Subject: [PATCH 27/42] STAR version --- parameters.hyperchicken.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 37845c8..b97ecc7 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -29,7 +29,7 @@ python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -starVersion,STAR/2.7.3a-${toolchain} +starVersion,STAR/STAR/2.7.9a-${toolchain} picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles From 68dcfec129b102f655acb155ff26660bc1844ace Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Fri, 17 Feb 2023 09:15:45 +0000 Subject: [PATCH 28/42] . --- parameters.hyperchicken.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index b97ecc7..42900f0 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -29,7 +29,7 @@ python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -starVersion,STAR/STAR/2.7.9a-${toolchain} +starVersion,STAR/2.7.9a-${GCCcoreVersion} picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles From 3c1088aa2c1a63c3df873a6762c58719915860c0 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Wed, 1 Mar 2023 14:48:11 +0000 Subject: [PATCH 29/42] STAR ref change --- parameters.homo_sapiens.GRCh37.csv | 2 +- parameters.hyperchicken.csv | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/parameters.homo_sapiens.GRCh37.csv b/parameters.homo_sapiens.GRCh37.csv index 07ad53b..b6ac19d 100755 --- a/parameters.homo_sapiens.GRCh37.csv +++ b/parameters.homo_sapiens.GRCh37.csv @@ -130,7 +130,7 @@ gtexJunc,/apps/data/GAD/others/GTEx_v7_bflb_junctions.gct omimList,/apps/data/GAD/others/OMIM2.list indexFile,${indexSpecies} dbsnpVcf,/apps/data/dbSNP/dbsnp_137.b37.vcf -starIndex,/apps/data/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/STAR-2.7.3a-${GCCcoreVersion}/ +starIndex,/apps/data/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/STAR-2.7.9a-${GCCVersion}/ #### GENOME VARIABLES #### genome,GRCh37 ensembleReleaseVersion,75 diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 42900f0..6d1d6f8 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -8,6 +8,7 @@ tempDir,${workDir}/${tmpName}/tmp/ permanentDir,${workDir}/prm03 toolchain,foss-2022a GCCcoreVersion,GCCcore-11.3.0 +GCCVersion,GCC-11.3.0 ##### Tools and versions ##### jdkVersion,Java/11-LTS @@ -29,7 +30,7 @@ python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -starVersion,STAR/2.7.9a-${GCCcoreVersion} +starVersion,STAR/2.7.9a-GCC-11.3.0 picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles From 797d5f935bc4702a1180dd6c074600fbbc2aabc5 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Thu, 2 Mar 2023 09:48:58 +0000 Subject: [PATCH 30/42] version --- parameters.homo_sapiens.GRCh37.csv | 2 +- parameters.hyperchicken.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parameters.homo_sapiens.GRCh37.csv b/parameters.homo_sapiens.GRCh37.csv index b6ac19d..c86ac6c 100755 --- a/parameters.homo_sapiens.GRCh37.csv +++ b/parameters.homo_sapiens.GRCh37.csv @@ -130,7 +130,7 @@ gtexJunc,/apps/data/GAD/others/GTEx_v7_bflb_junctions.gct omimList,/apps/data/GAD/others/OMIM2.list indexFile,${indexSpecies} dbsnpVcf,/apps/data/dbSNP/dbsnp_137.b37.vcf -starIndex,/apps/data/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/STAR-2.7.9a-${GCCVersion}/ +starIndex,/apps/data/Ensembl/GrCh37.75/pub/release-75/fasta/homo_sapiens/STAR-2.7.3a-foss-2018b/ #### GENOME VARIABLES #### genome,GRCh37 ensembleReleaseVersion,75 diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 6d1d6f8..8c56e0a 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -30,7 +30,7 @@ python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -starVersion,STAR/2.7.9a-GCC-11.3.0 +starVersion,2.7.3a-GCC-11.3.0 picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles From e7e18fe39bf72138694820364222fd1caf63e75c Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Thu, 2 Mar 2023 10:28:51 +0000 Subject: [PATCH 31/42] version fix, and bugfix strandedness --- parameters.hyperchicken.csv | 2 +- protocols/QCStats.sh | 2 +- protocols/rMATs.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 8c56e0a..22fefbe 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -30,7 +30,7 @@ python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif -starVersion,2.7.3a-GCC-11.3.0 +starVersion,STAR/2.7.3a-GCC-11.3.0 picardJar,picard.jar gatkJar,gatk-package-4.1.4.1-local.jar mergeSamFilesJar,MergeSamFiles diff --git a/protocols/QCStats.sh b/protocols/QCStats.sh index 14f64b1..a9833dd 100755 --- a/protocols/QCStats.sh +++ b/protocols/QCStats.sh @@ -44,7 +44,7 @@ module list num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" -STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "SECOND_READ_TRANSCRIPTION_STRAND"}else if($2 > 0.6){print "FIRST_READ_TRANSCRIPTION_STRAND"}else if($1 < 0.6 && $2 < 0.6){print "NONE"} }') +STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "FIRST_READ_TRANSCRIPTION_STRAND"}else if($2 > 0.6){print "SECOND_READ_TRANSCRIPTION_STRAND"}else if($1 < 0.6 && $2 < 0.6){print "NONE"} }') #If paired-end do fastqc for both ends, else only for one if [[ "${seqType}" == "PE" ]] diff --git a/protocols/rMATs.sh b/protocols/rMATs.sh index 0d723b6..ceffd13 100755 --- a/protocols/rMATs.sh +++ b/protocols/rMATs.sh @@ -48,7 +48,7 @@ done < "${intermediateDir}${externalSampleID}.SJ.design.tsv" num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" -STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "fr-secondstrand"}else if($2 > 0.6){print "fr-firststrand"}else if($1 < 0.6 && $2 < 0.6){print "fr-unstranded"} }') +STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "fr-firststrand"}else if($2 > 0.6){print "fr-secondstrand"}else if($1 < 0.6 && $2 < 0.6){print "fr-unstranded"} }') singularity exec --bind "${intermediateDir}":/intermediateDir,/apps:/apps,/groups:/groups "${sifDir}/${rMATsVersion}" python /rmats/rmats.py \ --b1 "/intermediateDir/${externalSampleID}.B1.txt" --b2 "/intermediateDir/${externalSampleID}.B2.txt" \ From 2bad75547d346f831a29a3bf2e85ce17bc32cd16 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Thu, 2 Mar 2023 15:03:03 +0000 Subject: [PATCH 32/42] versions --- parameters.hyperchicken.csv | 2 +- protocols/Design.sh | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index 22fefbe..d4de494 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -28,7 +28,7 @@ pythonVersion,Python/3.10.4-${GCCcoreVersion}-bare python2Version,Python/2.7.16-${GCCcoreVersion}-bare python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 -gatkVersion,GATK/4.1.4.1-Java-8-LTS +gatkVersion,GATK/4.2.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif starVersion,STAR/2.7.3a-GCC-11.3.0 picardJar,picard.jar diff --git a/protocols/Design.sh b/protocols/Design.sh index 041e16f..3255706 100755 --- a/protocols/Design.sh +++ b/protocols/Design.sh @@ -1,5 +1,5 @@ set -o pipefail -#MOLGENIS nodes=1 ppn=4 mem=4gb walltime=05:59:00 +#MOLGENIS nodes=1 ppn=1 mem=4gb walltime=05:59:00 #Parameter mapping #string rPlusVersion @@ -10,9 +10,6 @@ set -o pipefail #string tmpName #string logsDir -module load "${rPlusVersion}" -module list - #Function to check if array contains value array_contains () { local array="$1[@]" From b25ca38499c45ec9a2c73334e812eb43a2053c9c Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Thu, 9 Mar 2023 12:47:09 +0000 Subject: [PATCH 33/42] leafcutter --- parameters.hyperchicken.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index d4de494..f4046fa 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -19,7 +19,7 @@ samtoolsVersion,SAMtools/1.16.1-${GCCcoreVersion} bedToolsVersion,BEDTools/2.30.0-${GCCcoreVersion} sambambaVersion,sambamba/0.7.0 rVersion,R/4.2.1-${toolchain}-bare -rPlusVersion,RPlus/4.2.1-${toolchain}-v21.10.1 +rPlusVersion,RPlus/4.2.1-${toolchain}-v23.01.1 picardVersion,picard/2.26.10-Java-8-LTS htseqVersion,HTSeq/0.11.0-${GCCcoreVersion}-Python-3.10.4 hisatVersion,hisat2/2.1.0-${toolchain} @@ -37,7 +37,7 @@ mergeSamFilesJar,MergeSamFiles trimGaloreVersion,TrimGalore/0.6.7-${GCCcoreVersion} cutadaptVersion,cutadapt/4.2-${GCCcoreVersion}-Python-3.10.4-bare rSeQCVersion,RSeQC/3.0.1-${GCCcoreVersion}-Python-3.10.4 -leafcutterVersion,leafcutter/aa12b1e-${toolchain} +leafcutterVersion,leafcutter/aa12b1e-${GCCcoreVersion} vipVersion,vip/v3.3.1-${toolchain} outriderVersion,outrider_latest.sif rMATsVersion,rmats_v4.1.2.sif From e5b7dc56f6a18afafaae639dfe06fd55597bf0f7 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Fri, 10 Mar 2023 15:12:23 +0000 Subject: [PATCH 34/42] fixes --- parameters.hyperchicken.csv | 2 +- protocols/BQSR.sh | 4 ++-- protocols/Leafcutter.sh | 2 ++ protocols/Leafcutter_intron_clustering.sh | 4 ++-- protocols/Leafcutter_rare.sh | 10 ++-------- protocols/SplitAndTrim.sh | 2 +- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index f4046fa..dde8459 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -26,7 +26,7 @@ hisatVersion,hisat2/2.1.0-${toolchain} htsLibVersion,HTSlib/1.16-${GCCcoreVersion} pythonVersion,Python/3.10.4-${GCCcoreVersion}-bare python2Version,Python/2.7.16-${GCCcoreVersion}-bare -python2PlusVersion,PythonPlus/2.7.16-${toolchain}-v20.12.1 +python2PlusVersion,PythonPlus/2.7.16-foss-2018b-v21.08.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 gatkVersion,GATK/4.2.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif diff --git a/protocols/BQSR.sh b/protocols/BQSR.sh index 4c1cbce..4c4a763 100755 --- a/protocols/BQSR.sh +++ b/protocols/BQSR.sh @@ -37,14 +37,14 @@ echo "Running GATK BQSR:" java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ -"${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" BaseRecalibrator \ +"${EBROOTGATK}/gatk-package-4.2.4.1-local.jar" BaseRecalibrator \ -R "${indexFile}" \ -I "${splitAndTrimBam}" \ -O "${bqsrBeforeGrp}" \ --known-sites "${dbsnpVcf}" java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ -"${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" ApplyBQSR \ +"${EBROOTGATK}/gatk-package-4.2.4.1-local.jar" ApplyBQSR \ -R "${indexFile}" \ -I "${splitAndTrimBam}" \ -O "${tmpBqsrBam}" \ diff --git a/protocols/Leafcutter.sh b/protocols/Leafcutter.sh index f94158f..21c08d3 100755 --- a/protocols/Leafcutter.sh +++ b/protocols/Leafcutter.sh @@ -47,6 +47,8 @@ awk -F',' -v id="${colID[0]}" -v con="${colArray[0]}" '{print $id".sorted.merged sed 1d "${intermediateDir}${project}_groups_file.txt" > "${intermediateDir}${project}"_groups_file.txt.tmp mv "${intermediateDir}${project}_groups_file.txt.tmp" "${intermediateDir}${project}_groups_file.txt" +export R_LIBS_USER="${EBROOTLEAFCUTTER}/R_LIBS/" + echo "conditionCount = ${conditionCount}" if [[ "${conditionCount}" -gt 1 ]] then diff --git a/protocols/Leafcutter_intron_clustering.sh b/protocols/Leafcutter_intron_clustering.sh index 4505c8e..8c3944b 100755 --- a/protocols/Leafcutter_intron_clustering.sh +++ b/protocols/Leafcutter_intron_clustering.sh @@ -20,7 +20,7 @@ module list num1="$(tail -n 2 "${strandedness}" | awk '{print $7}' | head -n 1)" num2="$(tail -n 1 "${strandedness}" | awk '{print $7}')" -STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "1"}else if($2 > 0.6){print "2"}else if($1 < 0.6 && $2 < 0.6){print "0"} }') +STRANDED=$(echo -e "${num1}\t${num2}" | awk '{if ($1 > 0.6){print "RF"}else if($2 > 0.6){print "FR"}else if($1 < 0.6 && $2 < 0.6){print "XS"} }') echo -e "\nWith strandedness type: ${STRANDED}, where (0 = unstranded, 1 = first-strand/RF, 2, = second-strand/FR)." @@ -38,7 +38,7 @@ do -a 8 \ -m 50 \ -M 500000 \ - -s 0 \ + -s "${STRANDED}" \ "${bamfile}" \ -o "${bamfile}.junc" diff --git a/protocols/Leafcutter_rare.sh b/protocols/Leafcutter_rare.sh index 4288da1..08dabdc 100755 --- a/protocols/Leafcutter_rare.sh +++ b/protocols/Leafcutter_rare.sh @@ -23,6 +23,8 @@ module load "${leafcutterVersion}" module load "${python2Version}" module list +export R_LIBS_USER="${EBROOTLEAFCUTTER}/R_LIBS/" + "${EBROOTLEAFCUTTER}/scripts/leafcutter_ds.R" \ -e "${annotationTxt}" \ --num_threads 4 \ @@ -33,12 +35,4 @@ module list "${intermediateDir}/${project}_leafcutter_cluster_regtools_perind_numers.counts.gz" \ "${intermediateDir}/${externalSampleID}.SJ.design.tsv" - Rscript "${EBROOTLEAFCUTTER}/scripts/ds_plots.R" \ - -e "${gencodeHg19AllExons}" \ - -o "${tmpintermediateDir}/${externalSampleID}_leafcutter_ds" \ - "${intermediateDir}/${project}_leafcutter_cluster_regtools_perind_numers.counts.gz" \ - "${intermediateDir}/${externalSampleID}.SJ.design.tsv" \ - "${tmpintermediateDir}/${externalSampleID}.leafcutter.outlier_cluster_significance.txt" \ - -f 0.05 - mv "${tmpintermediateDir}/${externalSampleID}"* "${intermediateDir}" diff --git a/protocols/SplitAndTrim.sh b/protocols/SplitAndTrim.sh index ce9f759..73df6bd 100755 --- a/protocols/SplitAndTrim.sh +++ b/protocols/SplitAndTrim.sh @@ -38,7 +38,7 @@ module list java -Xmx10g -XX:ParallelGCThreads=2 \ -Djava.io.tmpdir="${tmpTmpDataDir}" \ --jar "${EBROOTGATK}/gatk-package-4.1.4.1-local.jar" SplitNCigarReads \ +-jar "${EBROOTGATK}/gatk-package-4.2.4.1-local.jar" SplitNCigarReads \ --tmp-dir "${tmpTmpDataDir}" \ -R "${indexFile}" \ -I "${sampleMergedDedupBam}" \ From 395ba654b17fe5d8b179fe7a9382d3a92052b38b Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Tue, 14 Mar 2023 09:12:34 +0000 Subject: [PATCH 35/42] version --- parameters.hyperchicken.csv | 2 +- protocols/BQSR.sh | 3 ++- protocols/SplitAndTrim.sh | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/parameters.hyperchicken.csv b/parameters.hyperchicken.csv index dde8459..6152cf0 100755 --- a/parameters.hyperchicken.csv +++ b/parameters.hyperchicken.csv @@ -28,7 +28,7 @@ pythonVersion,Python/3.10.4-${GCCcoreVersion}-bare python2Version,Python/2.7.16-${GCCcoreVersion}-bare python2PlusVersion,PythonPlus/2.7.16-foss-2018b-v21.08.1 pythonPlusVersion,PythonPlus/3.10.4-${toolchain}-v22.11.3 -gatkVersion,GATK/4.2.4.1-Java-8-LTS +gatkVersion,GATK/4.1.4.1-Java-8-LTS multiqcVersion,multiqc_v1.12.sif starVersion,STAR/2.7.3a-GCC-11.3.0 picardJar,picard.jar diff --git a/protocols/BQSR.sh b/protocols/BQSR.sh index 4c4a763..79c8823 100755 --- a/protocols/BQSR.sh +++ b/protocols/BQSR.sh @@ -5,6 +5,7 @@ set -o pipefail #string stage #string checkStage #string gatkVersion +#string gatkJar #string intermediateDir #string externalSampleID #string bqsrBam @@ -37,7 +38,7 @@ echo "Running GATK BQSR:" java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ -"${EBROOTGATK}/gatk-package-4.2.4.1-local.jar" BaseRecalibrator \ +"${EBROOTGATK}/${gatkJar}" BaseRecalibrator \ -R "${indexFile}" \ -I "${splitAndTrimBam}" \ -O "${bqsrBeforeGrp}" \ diff --git a/protocols/SplitAndTrim.sh b/protocols/SplitAndTrim.sh index 73df6bd..aec4b20 100755 --- a/protocols/SplitAndTrim.sh +++ b/protocols/SplitAndTrim.sh @@ -38,7 +38,7 @@ module list java -Xmx10g -XX:ParallelGCThreads=2 \ -Djava.io.tmpdir="${tmpTmpDataDir}" \ --jar "${EBROOTGATK}/gatk-package-4.2.4.1-local.jar" SplitNCigarReads \ +-jar "${EBROOTGATK}/${gatkJar}" SplitNCigarReads \ --tmp-dir "${tmpTmpDataDir}" \ -R "${indexFile}" \ -I "${sampleMergedDedupBam}" \ From a54de8a82b7ee6e2d7c05547add9a0ff937ebb79 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Wed, 15 Mar 2023 12:22:20 +0000 Subject: [PATCH 36/42] trigger, atotest --- protocols/BQSR.sh | 2 +- protocols/QCStats.sh | 2 ++ test/protocols/Autotest.sh | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/protocols/BQSR.sh b/protocols/BQSR.sh index 79c8823..19fc826 100755 --- a/protocols/BQSR.sh +++ b/protocols/BQSR.sh @@ -45,7 +45,7 @@ java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ --known-sites "${dbsnpVcf}" java -jar -Xmx7g -XX:ParallelGCThreads=2 -Djava.io.tmpdir="${tmpTmpDataDir}" \ -"${EBROOTGATK}/gatk-package-4.2.4.1-local.jar" ApplyBQSR \ +"${EBROOTGATK}/${gatkJar}" ApplyBQSR \ -R "${indexFile}" \ -I "${splitAndTrimBam}" \ -O "${tmpBqsrBam}" \ diff --git a/protocols/QCStats.sh b/protocols/QCStats.sh index a9833dd..0aad218 100755 --- a/protocols/QCStats.sh +++ b/protocols/QCStats.sh @@ -25,6 +25,7 @@ set -o pipefail #string picardVersion #string samtoolsVersion #string ngsVersion +#string rPlusVersion #string pythonVersion #string picardJar #string project @@ -38,6 +39,7 @@ module load "${picardVersion}" module load "${samtoolsVersion}" module load "${pythonVersion}" module load "${ngsVersion}" +module load "${rPlusVersion}" module list # Get strandness. diff --git a/test/protocols/Autotest.sh b/test/protocols/Autotest.sh index d112f82..02ad6c0 100755 --- a/test/protocols/Autotest.sh +++ b/test/protocols/Autotest.sh @@ -9,7 +9,7 @@ #tmp fix export TERM='xterm' -testResults="/groups/umcg-atd/tmp01/tmp/NGS_RNA/testdata_true/" +testResults="/groups/umcg-atd/${tmpName}/tmp/NGS_RNA/testdata_true/" mkdir -p "${testResults}/output_NGS_RNA" module load ngs-utils From d8340a7f0e06cde45798f397aacbedb17075c026 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Thu, 16 Mar 2023 09:45:40 +0000 Subject: [PATCH 37/42] trigger --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6744355..1f4e268 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

NGS_RNA pipeline

+

NGS_RNA pipeline

Description of the different steps used in the RNA analysis pipeline

From 1066d770444711783a1c1001046b8430ffd1d5d0 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Thu, 16 Mar 2023 10:18:54 +0000 Subject: [PATCH 38/42] update core, and testtime --- protocols/CombineFastq.sh | 2 +- protocols/DE.sh | 2 +- protocols/MarkDuplicates.sh | 2 +- protocols/TrimReads_TrimGalore.sh | 2 +- test/test_pipeline.sh | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/protocols/CombineFastq.sh b/protocols/CombineFastq.sh index 9fc3596..5df758b 100644 --- a/protocols/CombineFastq.sh +++ b/protocols/CombineFastq.sh @@ -1,5 +1,5 @@ set -o pipefail -#MOLGENIS walltime=23:59:00 mem=8gb ppn=6 +#MOLGENIS walltime=23:59:00 mem=8gb ppn=1 #Parameter mapping #string tempDir diff --git a/protocols/DE.sh b/protocols/DE.sh index b30ac6a..669b89a 100755 --- a/protocols/DE.sh +++ b/protocols/DE.sh @@ -1,5 +1,5 @@ set -o pipefail -#MOLGENIS nodes=1 ppn=4 mem=4gb walltime=05:59:00 +#MOLGENIS nodes=1 ppn=1 mem=4gb walltime=05:59:00 #Parameter mapping #string rPlusVersion diff --git a/protocols/MarkDuplicates.sh b/protocols/MarkDuplicates.sh index ec68e75..cdce494 100755 --- a/protocols/MarkDuplicates.sh +++ b/protocols/MarkDuplicates.sh @@ -1,6 +1,6 @@ set -o pipefail #!/bin/bash -#MOLGENIS walltime=23:59:00 mem=8gb ppn=6 +#MOLGENIS walltime=23:59:00 mem=8gb ppn=4 #Parameter mapping #string picardVersion diff --git a/protocols/TrimReads_TrimGalore.sh b/protocols/TrimReads_TrimGalore.sh index 92c2e30..4715f60 100755 --- a/protocols/TrimReads_TrimGalore.sh +++ b/protocols/TrimReads_TrimGalore.sh @@ -1,5 +1,5 @@ set -o pipefail -#MOLGENIS nodes=1 ppn=4 mem=4gb walltime=23:59:00 +#MOLGENIS nodes=1 ppn=1 mem=4gb walltime=23:59:00 #Parameter mapping #string seqType diff --git a/test/test_pipeline.sh b/test/test_pipeline.sh index 8838d2a..9428dad 100644 --- a/test/test_pipeline.sh +++ b/test/test_pipeline.sh @@ -63,9 +63,9 @@ function checkIfFinished(){ minutes=$((minutes+2)) count=$((count+2)) - if [ $count -eq 30 ] + if [ $count -eq 35 ] then - echo "the test was not finished within 30 minutes, let's kill it" + echo "the test was not finished within 35 minutes, let's kill it" echo -e "\n" for i in ${workfolder}/projects/${_projectName}/run01/jobs/*.sh do From 3d8f61eaa0b903b1a89f81bdbbb82e1af555a764 Mon Sep 17 00:00:00 2001 From: "Gerben van der Vries\" gerbenvandervries@gmail.com" Date: Thu, 16 Mar 2023 11:35:49 +0000 Subject: [PATCH 39/42] . --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f4e268..6744355 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

NGS_RNA pipeline

+

NGS_RNA pipeline

Description of the different steps used in the RNA analysis pipeline

From e6e5566b743de9b074ce725ec3dd0f657ffebb36 Mon Sep 17 00:00:00 2001 From: Pieter Neerincx Date: Mon, 19 Jun 2023 21:49:35 +0200 Subject: [PATCH 40/42] Updated shellcheck test script: synced with updates previously already applied to other repos. --- check/shellcheck.sh | 49 +++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/check/shellcheck.sh b/check/shellcheck.sh index 23bc1d9..7f02914 100755 --- a/check/shellcheck.sh +++ b/check/shellcheck.sh @@ -1,5 +1,9 @@ #!/bin/bash +set -e +set -u +set -o pipefail + # # Disable some shellcheck warnings: # * SC2004: $/${} is unnecessary on arithmetic variables. @@ -7,12 +11,9 @@ # * SC2015: Note that A && B || C is not if-then-else. C may run when A is true. # We know and use this construct regularly to create "transactions" # where C is only executed when both A and B have succeeded. -# * SC2154: Due to Molgenis Compute string initialization the warning is not valid -# -# * SC2148: the shebang is declared in the header.ftl not in the protocols +# * SC2148: The shebang is declared in the header.ftl and not in the protocols. # - -export SHELLCHECK_OPTS="-e SC2004 -e SC2015 -e SC2154 -e SC2148" +export SHELLCHECK_OPTS="-e SC2004 -e SC2015 -e SC2148" function showHelp() { # @@ -35,6 +36,7 @@ EOH # # Parse commandline options # +declare format='gcc' # default while getopts ":hv" opt do case "${opt}" in @@ -42,7 +44,7 @@ do showHelp ;; v) - verbose='1' + format='tty' ;; \?) printf '%s\n' "FATAL: Invalid option -${OPTARG}. Try $(basename "${0}") -h for help." @@ -68,14 +70,13 @@ which shellcheck 2>&1 >/dev/null \ exit 1 } +MYDIR="$(cd -P "$(dirname "${0}")" && pwd)" # -# Run ShellCheck for all Bash scripts in the bin/ subdir. -# * Includes sourced files, so the libraries from the lib/ subfolder +# Run ShellCheck for all Bash scripts in the protocols/ subdir. +# * Includes sourced files, so files in a subfolder # are checked too as long a they are used in at least one script. -# * Select format and output based on whether this script is -# executed by Jenkins or by a regular user. # -if [[ -n "${WORKSPACE:-}" ]] +if [[ "${CIRCLECI:-}" == true ]] then # # Exclude SC2154 (warning for variables that are referenced but not assigned), @@ -83,27 +84,9 @@ then # export SHELLCHECK_OPTS="${SHELLCHECK_OPTS} -e SC2154" # - # ShellCheck for Jenkins. - # - shellcheck -a -x -o all -f checkstyle "${WORKSPACE}"/protocols/*.sh | tee checkstyle-result.xml - # - # Reformat the generated report to add hyperlinks to the ShellCheck issues on the wiki: - # https://github.com/koalaman/shellcheck/wiki/SC${ISSUENUMBER} - # explaining what is wrong with the code / style and how to improve it. - # - perl -pi -e "s|message='([^']+)'\s+source='ShellCheck.(SC[0-9]+)'|message='<a href="https://github.com/koalaman/shellcheck/wiki/\$2">\$2: -\$1</a>' source='ShellCheck.\$2'|" checkstyle-result.xml -else - # - # ShellCheck for regular user on the commandline. + # Exclude SC2312 (warning for masking return values of command in a subshell when using process substitution) temporarily, + # because we did not find a reliable fix yet.... # - MYDIR="$(cd -P "$(dirname "${0}")" && pwd)" - if [[ "${verbose:-0}" -eq 1 ]] - then - cd "${MYDIR}/.." - shellcheck -a -x -o all -f tty protocols/*.sh # cannot use the printf construct used below for non-vebose output as it destroys the terminal colors. - cd '-' # Goes back to previous directory before we changed to ${MYDIR}. - else - printf '%s\n' "$(cd "${MYDIR}/.." && shellcheck -a -x -o all -f gcc protocols/*.sh)" - fi + export SHELLCHECK_OPTS="${SHELLCHECK_OPTS} -e SC2312" fi +shellcheck -a -x -o all -f "${format}" "${MYDIR}"/../protocols/*.sh | sed "s|${MYDIR}/../||g" \ No newline at end of file From c451e109fdd00169374e3c5e1b134f9e79f38bd2 Mon Sep 17 00:00:00 2001 From: Pieter Neerincx Date: Mon, 19 Jun 2023 22:16:11 +0200 Subject: [PATCH 41/42] Trigger Jenkins. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6744355..bfe8604 100644 --- a/README.md +++ b/README.md @@ -99,3 +99,4 @@ Navigate to jobs folder. The location of the jobs folder will be outputted at th ```BASH bash submit.sh ``` + From a290e9a1b94bdaa7ac3c4303fcec9deb8dee4ade Mon Sep 17 00:00:00 2001 From: Pieter Neerincx Date: Tue, 20 Jun 2023 14:33:39 +0200 Subject: [PATCH 42/42] Removed Calculon from checkEnvironment.sh. --- checkEnvironment.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/checkEnvironment.sh b/checkEnvironment.sh index f05d6d2..d8f8a64 100755 --- a/checkEnvironment.sh +++ b/checkEnvironment.sh @@ -16,12 +16,8 @@ elif [ "${HOST}" == "leucine-zipper.gcc.rug.nl" ] then ENVIRONMENT_PARAMETERS="leucine-zipper" TMPDIR="tmp06" -elif [ "${HOST}" == "calculon" ] -then - ENVIRONMENT_PARAMETERS="calculon" - TMPDIR="tmp04" else - echo "Unknown host: running is only possible on calculon, zinc-finger or leucine-zipper." + echo "Unknown host: running is only possible on zinc-finger or leucine-zipper." fi THISDIR=$(pwd)