diff --git a/packages/2016_Broushaki_Science/2016_Broushaki_Science.config b/packages/2016_Broushaki_Science/2016_Broushaki_Science.config new file mode 100644 index 0000000..54a63a0 --- /dev/null +++ b/packages/2016_Broushaki_Science/2016_Broushaki_Science.config @@ -0,0 +1,41 @@ +// Keep track of config versions +minotaur_release='0.5.0' // The release tag of the poseidon-eager repository used for processing and config file retrieval +config_template_version='0.5.0' +package_config_version='0.5.0' +minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf" + +// This configuration file is designed to be a used with the nf-core/eager pipeline. +// Instead of having to specify all other configurations for the Minotaur pipeline +// on runtime, they are all contained in this file and loaded automatically upon +// specifying this config file during runtime. Additionally, any parameters that +// need to be altered from the defaults can be specified here. +// +// The intention is to make it easy for users to understand and reproduce the output +// from processing with the Minotaur workflow processing from the contents of a +// single file. + +// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch. +includeConfig "${minotaur_config_base}/EVA_cluster.config" // Cluster-specific configurations for nf-core/eager execution at MPI-EVA +includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing. + +// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping. +// TODO: Select the appropriate config for the CaptureType of the package. +includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config" + +params { + // Keep track of config file versions used when processing + config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}" + config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)" + + /* + TODO: If you need to change any of the default processing parameters for this package + you can specify these parameters below. + Any parameters not specified in any of the config files default to their nf-core/eager default values. + + For information on all available parameters and their default values see: + https://nf-co.re/eager/2.5.1/parameters + + You can see the latest default values for parameters within poseidon-eager at: + https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config + */ +} diff --git a/packages/2016_Broushaki_Science/2016_Broushaki_Science.ssf b/packages/2016_Broushaki_Science/2016_Broushaki_Science.ssf new file mode 100644 index 0000000..018f193 --- /dev/null +++ b/packages/2016_Broushaki_Science/2016_Broushaki_Science.ssf @@ -0,0 +1,6 @@ +poseidon_IDs udg library_built notes run_accession study_accession sample_accession sample_alias secondary_sample_accession first_public last_updated instrument_model library_layout library_source instrument_platform library_name library_strategy fastq_ftp fastq_aspera fastq_bytes fastq_md5 read_count submitted_ftp submitted_md5 +AH1 minus ds n/a ERR1427797 PRJEB14180 SAMEA4015206 AH1 ERS1186316 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/007/ERR1427797/ERR1427797.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/007/ERR1427797/ERR1427797.fastq.gz 3206724794 a18da14f96ceb4f60c740d5608bdceb4 67506999 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427797/AH1.all_lib.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427797/AH1.all_lib.Mkdup.len.realg.bam.bai 3458ae5f33f0d79e57b504d46b5d9e96;3e6a63bbbacb41d2c675ee3887ab3b8c +AH2 minus ds n/a ERR1427798 PRJEB14180 SAMEA4015207 AH2 ERS1186317 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/008/ERR1427798/ERR1427798.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/008/ERR1427798/ERR1427798.fastq.gz 1632005812 af98dcf94b55a1deffc34e8cb2ee719a 34394545 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427798/AH2.all_lib.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427798/AH2.all_lib.Mkdup.len.realg.bam.bai 7ce1897d3f9deeeb4106dc23309951f9;2f1b18d63d2f0f334d5c4b15370219a2 +AH4 minus ds n/a ERR1427799 PRJEB14180 SAMEA4015208 AH4 ERS1186318 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/009/ERR1427799/ERR1427799.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/009/ERR1427799/ERR1427799.fastq.gz 2393087748 ea1ed117e1a5ff0b345b0920f3b999f2 50397195 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427799/AH4.all_lib.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427799/AH4.all_lib.Mkdup.len.realg.bam.bai 7eed5cda5fa6cb0c7652b15093442354;4077b41d4b4530f447557f92a88917e9 +F38 minus ds n/a ERR1427822 PRJEB14180 SAMEA4015209 F38 ERS1186319 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/002/ERR1427822/ERR1427822.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/002/ERR1427822/ERR1427822.fastq.gz 3897149735 9b75054007eeca140aed003b1c524db5 68748753 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427822/F38.all_realn.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427822/F38.all_realn.bam.bai 159266dfea8bc6aaa4f9e5a76c9fc9f6;081b081ca9f9f8e52c5ddc616ad583bd +WC1 minus ds n/a ERR1427821 PRJEB14180 SAMEA4015210 WC1 ERS1186320 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/001/ERR1427821/ERR1427821.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/001/ERR1427821/ERR1427821.fastq.gz 17945861342 0b8f4965bb9cb3893f83c21ed6f60e82 326262818 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427821/WC1.all_SG_join.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427821/WC1.all_SG_join.Mkdup.len.realg.bam.bai 968c5f3b0272a8be133bf9c884afd5c2;736ddbd6cd69e0b74caae2602c90bbd7 diff --git a/packages/2016_Broushaki_Science/2016_Broushaki_Science.tsv b/packages/2016_Broushaki_Science/2016_Broushaki_Science.tsv new file mode 100644 index 0000000..2d1cb56 --- /dev/null +++ b/packages/2016_Broushaki_Science/2016_Broushaki_Science.tsv @@ -0,0 +1,6 @@ +Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file BAM_target +AH1 AH1_unspecified 1 4 SE Homo sapiens (modern human) double none /AH1_unspecified_L1_R1.fastq.gz NA NA ERR1427797.fastq.gz NA NA +AH2 AH2_unspecified 1 4 SE Homo sapiens (modern human) double none /AH2_unspecified_L1_R1.fastq.gz NA NA ERR1427798.fastq.gz NA NA +AH4 AH4_unspecified 1 4 SE Homo sapiens (modern human) double none /AH4_unspecified_L1_R1.fastq.gz NA NA ERR1427799.fastq.gz NA NA +F38 F38_unspecified 1 4 SE Homo sapiens (modern human) double none /F38_unspecified_L1_R1.fastq.gz NA NA ERR1427822.fastq.gz NA NA +WC1 WC1_unspecified 1 4 SE Homo sapiens (modern human) double none /WC1_unspecified_L1_R1.fastq.gz NA NA ERR1427821.fastq.gz NA NA diff --git a/packages/2016_Broushaki_Science/2016_Broushaki_Science.tsv_patch.sh b/packages/2016_Broushaki_Science/2016_Broushaki_Science.tsv_patch.sh new file mode 100755 index 0000000..a73f9af --- /dev/null +++ b/packages/2016_Broushaki_Science/2016_Broushaki_Science.tsv_patch.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -uo pipefail ## Pipefail, complain on new unassigned variables. + +## Track the version of the TSV_patch template used +VERSION='0.2.1dev' + +## This script is applied to the eager input TSV file locally to edit the dummy +## path to the fastQ files added by `create_eager_input.sh` to a real local +## path provided as a positional argument. Any further local tweaks to the +## TSV before running eager should be added below that in the form of bash +## commands to aid in reproducibility. + +## usage tsv_patch.sh + +local_data_dir="$(readlink -f ${1})" +input_tsv="$(readlink -f ${2})" +output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv" +columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM") +source $(readlink -f ${3}) ## Path to helper function script should be provided as 3rd argument. https://github.com/poseidon-framework/poseidon-eager/blob/main/scripts/source_me.sh + +## Index non-proliferated columns and exclude them from the finalised TSV +cut_selector='' +tsv_header=($(head -n1 ${input_tsv})) +for col_name in ${columns_to_keep[@]}; do + let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing + if [[ ! ${idx} -eq -1 ]]; then + cut_selector+="${idx}," + fi +done + +## Remove added columns, and put columns in right order +cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv} +sed -i -e "s||${local_data_dir}|g" ${output_tsv} + +## Any further commands to edit the file before finalisation should be added below as shown +# sed -ie 's/replace_this/with_this/g' ${output_tsv} + +## Keep track of versions +version_file="$(dirname ${input_tsv})/script_versions.txt" +## Remove versions from older run if there +grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new +## Then add new versions +echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new +echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new +mv ${version_file}.new ${version_file} diff --git a/packages/2016_Broushaki_Science/script_versions.txt b/packages/2016_Broushaki_Science/script_versions.txt new file mode 100644 index 0000000..13ace45 --- /dev/null +++ b/packages/2016_Broushaki_Science/script_versions.txt @@ -0,0 +1,2 @@ +create_eager_input.sh: 0.5.1 +source_me.sh for initial TSV: 0.5.2