Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions packages/2016_Broushaki_Science/2016_Broushaki_Science.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Keep track of config versions
minotaur_release='0.5.0' // The release tag of the poseidon-eager repository used for processing and config file retrieval
config_template_version='0.5.0'
package_config_version='0.5.0'
minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf"

// This configuration file is designed to be a used with the nf-core/eager pipeline.
// Instead of having to specify all other configurations for the Minotaur pipeline
// on runtime, they are all contained in this file and loaded automatically upon
// specifying this config file during runtime. Additionally, any parameters that
// need to be altered from the defaults can be specified here.
//
// The intention is to make it easy for users to understand and reproduce the output
// from processing with the Minotaur workflow processing from the contents of a
// single file.

// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch.
includeConfig "${minotaur_config_base}/EVA_cluster.config" // Cluster-specific configurations for nf-core/eager execution at MPI-EVA
includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing.

// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping.
// TODO: Select the appropriate config for the CaptureType of the package.
includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config"

params {
// Keep track of config file versions used when processing
config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}"
config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)"

/*
TODO: If you need to change any of the default processing parameters for this package
you can specify these parameters below.
Any parameters not specified in any of the config files default to their nf-core/eager default values.

For information on all available parameters and their default values see:
https://nf-co.re/eager/2.5.1/parameters

You can see the latest default values for parameters within poseidon-eager at:
https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config
*/
}
6 changes: 6 additions & 0 deletions packages/2016_Broushaki_Science/2016_Broushaki_Science.ssf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
poseidon_IDs udg library_built notes run_accession study_accession sample_accession sample_alias secondary_sample_accession first_public last_updated instrument_model library_layout library_source instrument_platform library_name library_strategy fastq_ftp fastq_aspera fastq_bytes fastq_md5 read_count submitted_ftp submitted_md5
AH1 minus ds n/a ERR1427797 PRJEB14180 SAMEA4015206 AH1 ERS1186316 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/007/ERR1427797/ERR1427797.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/007/ERR1427797/ERR1427797.fastq.gz 3206724794 a18da14f96ceb4f60c740d5608bdceb4 67506999 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427797/AH1.all_lib.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427797/AH1.all_lib.Mkdup.len.realg.bam.bai 3458ae5f33f0d79e57b504d46b5d9e96;3e6a63bbbacb41d2c675ee3887ab3b8c
AH2 minus ds n/a ERR1427798 PRJEB14180 SAMEA4015207 AH2 ERS1186317 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/008/ERR1427798/ERR1427798.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/008/ERR1427798/ERR1427798.fastq.gz 1632005812 af98dcf94b55a1deffc34e8cb2ee719a 34394545 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427798/AH2.all_lib.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427798/AH2.all_lib.Mkdup.len.realg.bam.bai 7ce1897d3f9deeeb4106dc23309951f9;2f1b18d63d2f0f334d5c4b15370219a2
AH4 minus ds n/a ERR1427799 PRJEB14180 SAMEA4015208 AH4 ERS1186318 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/009/ERR1427799/ERR1427799.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/009/ERR1427799/ERR1427799.fastq.gz 2393087748 ea1ed117e1a5ff0b345b0920f3b999f2 50397195 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427799/AH4.all_lib.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427799/AH4.all_lib.Mkdup.len.realg.bam.bai 7eed5cda5fa6cb0c7652b15093442354;4077b41d4b4530f447557f92a88917e9
F38 minus ds n/a ERR1427822 PRJEB14180 SAMEA4015209 F38 ERS1186319 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/002/ERR1427822/ERR1427822.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/002/ERR1427822/ERR1427822.fastq.gz 3897149735 9b75054007eeca140aed003b1c524db5 68748753 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427822/F38.all_realn.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427822/F38.all_realn.bam.bai 159266dfea8bc6aaa4f9e5a76c9fc9f6;081b081ca9f9f8e52c5ddc616ad583bd
WC1 minus ds n/a ERR1427821 PRJEB14180 SAMEA4015210 WC1 ERS1186320 2016-07-15 2018-11-16 Illumina HiSeq 2500 SINGLE METAGENOMIC ILLUMINA unspecified WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR142/001/ERR1427821/ERR1427821.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR142/001/ERR1427821/ERR1427821.fastq.gz 17945861342 0b8f4965bb9cb3893f83c21ed6f60e82 326262818 ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427821/WC1.all_SG_join.Mkdup.len.realg.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR142/ERR1427821/WC1.all_SG_join.Mkdup.len.realg.bam.bai 968c5f3b0272a8be133bf9c884afd5c2;736ddbd6cd69e0b74caae2602c90bbd7
6 changes: 6 additions & 0 deletions packages/2016_Broushaki_Science/2016_Broushaki_Science.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file BAM_target
AH1 AH1_unspecified 1 4 SE Homo sapiens (modern human) double none <PATH_TO_DATA>/AH1_unspecified_L1_R1.fastq.gz NA NA ERR1427797.fastq.gz NA NA
AH2 AH2_unspecified 1 4 SE Homo sapiens (modern human) double none <PATH_TO_DATA>/AH2_unspecified_L1_R1.fastq.gz NA NA ERR1427798.fastq.gz NA NA
AH4 AH4_unspecified 1 4 SE Homo sapiens (modern human) double none <PATH_TO_DATA>/AH4_unspecified_L1_R1.fastq.gz NA NA ERR1427799.fastq.gz NA NA
F38 F38_unspecified 1 4 SE Homo sapiens (modern human) double none <PATH_TO_DATA>/F38_unspecified_L1_R1.fastq.gz NA NA ERR1427822.fastq.gz NA NA
WC1 WC1_unspecified 1 4 SE Homo sapiens (modern human) double none <PATH_TO_DATA>/WC1_unspecified_L1_R1.fastq.gz NA NA ERR1427821.fastq.gz NA NA
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash
set -uo pipefail ## Pipefail, complain on new unassigned variables.

## Track the version of the TSV_patch template used
VERSION='0.2.1dev'

## This script is applied to the eager input TSV file locally to edit the dummy
## path to the fastQ files added by `create_eager_input.sh` to a real local
## path provided as a positional argument. Any further local tweaks to the
## TSV before running eager should be added below that in the form of bash
## commands to aid in reproducibility.

## usage tsv_patch.sh <local_data_dir> <input_tsv> <path/to/source_me.sh>

local_data_dir="$(readlink -f ${1})"
input_tsv="$(readlink -f ${2})"
output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv"
columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM")
source $(readlink -f ${3}) ## Path to helper function script should be provided as 3rd argument. https://github.com/poseidon-framework/poseidon-eager/blob/main/scripts/source_me.sh

## Index non-proliferated columns and exclude them from the finalised TSV
cut_selector=''
tsv_header=($(head -n1 ${input_tsv}))
for col_name in ${columns_to_keep[@]}; do
let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing
if [[ ! ${idx} -eq -1 ]]; then
cut_selector+="${idx},"
fi
done

## Remove added columns, and put columns in right order
cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv}
sed -i -e "s|<PATH_TO_DATA>|${local_data_dir}|g" ${output_tsv}

## Any further commands to edit the file before finalisation should be added below as shown
# sed -ie 's/replace_this/with_this/g' ${output_tsv}

## Keep track of versions
version_file="$(dirname ${input_tsv})/script_versions.txt"
## Remove versions from older run if there
grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new
## Then add new versions
echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new
echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new
mv ${version_file}.new ${version_file}
2 changes: 2 additions & 0 deletions packages/2016_Broushaki_Science/script_versions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
create_eager_input.sh: 0.5.1
source_me.sh for initial TSV: 0.5.2