Merge pull request #63 from Kavlahkaff/2022_CampeloDosSantos_Brazil

Add package: 2022_CampeloDosSantos_Brazil
poseidon-framework · Feb 20, 2025 · ee52c6a · ee52c6a
2 parents 24fc77e + 0fec61d
commit ee52c6a
Show file tree

Hide file tree

Showing 5 changed files with 94 additions and 0 deletions.
diff --git a/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.config b/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.config
@@ -0,0 +1,41 @@
+// Keep track of config versions
+minotaur_release='0.4.0dev' // The release tag of the poseidon-eager repository used for processing and config file retrieval
+config_template_version='0.3.0dev'
+package_config_version='0.3.0dev'
+minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf"
+
+// This configuration file is designed to be a used with the nf-core/eager pipeline.
+//   Instead of having to specify all other configurations for the Minotaur pipeline
+//   on runtime, they are all contained in this file and loaded automatically upon
+//   specifying this config file during runtime. Additionally, any parameters that
+//   need to be altered from the defaults can be specified here.
+//
+// The intention is to make it easy for users to understand and reproduce the output
+//  from processing with the Minotaur workflow processing from the contents of a
+//  single file.
+
+// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch.
+includeConfig "${minotaur_config_base}/EVA_cluster.config" // Cluster-specific configurations for nf-core/eager execution at MPI-EVA
+includeConfig "${minotaur_config_base}/Minotaur.config"    // Default nf-core/eager parameters for Minotaur processing.
+
+// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping.
+// TODO: Select the appropriate config for the CaptureType of the package.
+includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config"
+
+params {
+  // Keep track of config file versions used when processing
+  config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}"
+  config_profile_contact     = "Thiseas C. Lamnidis (@TCLamnidis)"
+
+  /* 
+  TODO: If you need to change any of the default processing parameters for this package
+  you can specify these parameters below.
+  Any parameters not specified in any of the config files default to their nf-core/eager default values.
+
+  For information on all available parameters and their default values see: 
+    https://nf-co.re/eager/2.5.1/parameters
+
+  You can see the latest default values for parameters within poseidon-eager at:
+    https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config
+  */
+}
diff --git a/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.ssf b/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.ssf
@@ -0,0 +1,3 @@
+poseidon_IDs	udg	library_built	notes	run_accession	study_accession	sample_accession	sample_alias	secondary_sample_accession	first_public	last_updated	instrument_model	library_layout	library_source	instrument_platform	library_name	library_strategy	fastq_ftp	fastq_aspera	fastq_bytes	fastq_md5	read_count	submitted_ftp
+Brazil-2	minus	ds	Data are a mix of UDG-treated and untreated libraries. Setting udg to "minus".	SRR21678399	PRJNA883375	SAMN30963978	Brazil-2	SRS15208711	2024-02-25	2024-02-26	Illumina NovaSeq 6000	PAIRED	GENOMIC	ILLUMINA	Brazil-2	WGS	ftp.sra.ebi.ac.uk/vol1/fastq/SRR216/099/SRR21678399/SRR21678399.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR216/099/SRR21678399/SRR21678399_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR216/099/SRR21678399/SRR21678399_2.fastq.gz	fasp.sra.ebi.ac.uk:/vol1/fastq/SRR216/099/SRR21678399/SRR21678399.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR216/099/SRR21678399/SRR21678399_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR216/099/SRR21678399/SRR21678399_2.fastq.gz	135992302;9614632412;9590954290	b92d349748d7d1eb50c468723a027d52;831af85710d7f2fcd090768aaa8d510b;d1cbc483b47d39866e5d899d5a347c40	550795936	n/a
+Brazil-12	minus	ds	Data are a mix of UDG-treated and untreated libraries. Setting udg to "minus".	SRR21678398	PRJNA883375	SAMN30963979	Brazil-12	SRS15208712	2024-02-25	2024-02-25	Illumina NovaSeq 6000	PAIRED	GENOMIC	ILLUMINA	Brazil-12	WGS	ftp.sra.ebi.ac.uk/vol1/fastq/SRR216/098/SRR21678398/SRR21678398.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR216/098/SRR21678398/SRR21678398_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR216/098/SRR21678398/SRR21678398_2.fastq.gz	fasp.sra.ebi.ac.uk:/vol1/fastq/SRR216/098/SRR21678398/SRR21678398.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR216/098/SRR21678398/SRR21678398_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR216/098/SRR21678398/SRR21678398_2.fastq.gz	124480192;10744951288;10729366448	e1cc73c2a3b48fdb9fea4fed7c4d491c;204c232258300c0c9fa3ae63814df1b2;c6d6fd5a74c9959fceef838ecbd6a72e	584297384	n/a
diff --git a/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.tsv b/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.tsv
@@ -0,0 +1,3 @@
+Sample_Name	Library_ID	Lane	Colour_Chemistry	SeqType	Organism	Strandedness	UDG_Treatment	R1	R2	BAM	R1_target_file	R2_target_file
+Brazil-2	Brazil-2_Brazil-2	1	4	SE	Homo sapiens (modern human)	double	none	<PATH_TO_DATA>/Brazil-2_Brazil-2_L1_R1.fastq.gz	NA	NA	SRR21678399.fastq.gz	NA
+Brazil-12	Brazil-12_Brazil-12	1	4	SE	Homo sapiens (modern human)	double	none	<PATH_TO_DATA>/Brazil-12_Brazil-12_L1_R1.fastq.gz	NA	NA	SRR21678398.fastq.gz	NA
diff --git a/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.tsv_patch.sh b/packages/2022_CampeloDosSantos_Brazil/2022_CampeloDosSantos_Brazil.tsv_patch.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+set -uo pipefail ## Pipefail, complain on new unassigned variables.
+
+## Track the version of the TSV_patch template used
+VERSION='0.2.1dev'
+
+## This script is applied to the eager input TSV file locally to edit the dummy
+##    path to the fastQ files added by `create_eager_input.sh` to a real local
+##    path provided as a positional argument. Any further local tweaks to the
+##    TSV before running eager should be added below that in the form of bash
+##    commands to aid in reproducibility.
+
+## usage tsv_patch.sh <local_data_dir> <input_tsv> <path/to/source_me.sh>
+
+local_data_dir="$(readlink -f ${1})"
+input_tsv="$(readlink -f ${2})"
+output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv"
+columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM")
+source $(readlink -f ${3}) ## Path to helper function script should be provided as 3rd argument. https://github.com/poseidon-framework/poseidon-eager/blob/main/scripts/source_me.sh
+
+## Index non-proliferated columns and exclude them from the finalised TSV
+cut_selector=''
+tsv_header=($(head -n1 ${input_tsv}))
+for col_name in ${columns_to_keep[@]}; do
+  let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing
+  if [[ ! ${idx} -eq -1 ]]; then
+    cut_selector+="${idx},"
+  fi
+done
+
+## Remove added columns, and put columns in right order
+cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv}
+sed -i -e "s|<PATH_TO_DATA>|${local_data_dir}|g" ${output_tsv}
+
+## Any further commands to edit the file before finalisation should be added below as shown
+# sed -ie 's/replace_this/with_this/g' ${output_tsv}
+
+## Keep track of versions
+version_file="$(dirname ${input_tsv})/script_versions.txt"
+##    Remove versions from older run if there
+grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new
+##    Then add new versions
+echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new
+echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new
+mv ${version_file}.new ${version_file}
diff --git a/packages/2022_CampeloDosSantos_Brazil/script_versions.txt b/packages/2022_CampeloDosSantos_Brazil/script_versions.txt
@@ -0,0 +1,2 @@
+create_eager_input.sh:	0.2.1dev
+source_me.sh for initial TSV:	0.2.3dev
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		create_eager_input.sh: 0.2.1dev
		source_me.sh for initial TSV: 0.2.3dev