Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions packages/2024_Higgins_MuraUP/2024_Higgins_MuraUP.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Keep track of config versions
minotaur_release='1.0.0' // The release tag of the poseidon-eager repository used for processing and config file retrieval
config_template_version='1.0.0'
package_config_version='1.0.0'
minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf"

// This configuration file is designed to be a used with the nf-core/eager pipeline.
// Instead of having to specify all other configurations for the Minotaur pipeline
// on runtime, they are all contained in this file and loaded automatically upon
// specifying this config file during runtime. Additionally, any parameters that
// need to be altered from the defaults can be specified here.
//
// The intention is to make it easy for users to understand and reproduce the output
// from processing with the Minotaur workflow processing from the contents of a
// single file.

// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch.
// The loaded config includes code that loads the institutional configs from https://github.com/poseidon-framework/minotaur-institutional-configs.
includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing.

// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping.
// TODO: Select the appropriate config for the CaptureType of the package.
includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config"

params {
// Keep track of config file versions used when processing
config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}"
config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)"

/*
TODO: If you need to change any of the default processing parameters for this package
you can specify these parameters below.
Any parameters not specified in any of the config files default to their nf-core/eager default values.

For information on all available parameters and their default values see:
https://nf-co.re/eager/2.5.1/parameters

You can see the latest default values for parameters within poseidon-eager at:
https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config
*/
}
2 changes: 2 additions & 0 deletions packages/2024_Higgins_MuraUP/2024_Higgins_MuraUP.ssf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
poseidon_IDs udg library_built notes run_accession study_accession sample_accession sample_alias secondary_sample_accession first_public last_updated instrument_model library_layout library_source instrument_platform library_name library_strategy fastq_ftp fastq_aspera fastq_bytes fastq_md5 read_count submitted_ftp submitted_md5
LeMura1 half ds n/a ERR12071704 PRJEB66279 SAMEA114405814 Mura_1 ERS16394488 2024-06-26 2024-06-26 Illumina NovaSeq 6000 SINGLE GENOMIC ILLUMINA Mura WGS ftp.sra.ebi.ac.uk/vol1/fastq/ERR120/004/ERR12071704/ERR12071704.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR120/004/ERR12071704/ERR12071704.fastq.gz 71969931006 d11ef194d7f7569602e9ee58bc9da5f3 2854042970 ftp.sra.ebi.ac.uk/vol1/run/ERR120/ERR12071704/Mura.merged.mapped.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR120/ERR12071704/Mura.merged.mapped.bam.bai 7af7614fbd5b0cb9b8f451d35bf221d4;34006d15246589588e04c04389bebce3
2 changes: 2 additions & 0 deletions packages/2024_Higgins_MuraUP/2024_Higgins_MuraUP.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file BAM_target
LeMura1 LeMura1_Mura 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/LeMura1_Mura_L1_R1.fastq.gz NA NA ERR12071704.fastq.gz NA NA
45 changes: 45 additions & 0 deletions packages/2024_Higgins_MuraUP/2024_Higgins_MuraUP.tsv_patch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash
set -uo pipefail ## Pipefail, complain on new unassigned variables.

## Track the version of the TSV_patch template used
VERSION='0.2.1dev'

## This script is applied to the eager input TSV file locally to edit the dummy
## path to the fastQ files added by `create_eager_input.sh` to a real local
## path provided as a positional argument. Any further local tweaks to the
## TSV before running eager should be added below that in the form of bash
## commands to aid in reproducibility.

## usage tsv_patch.sh <local_data_dir> <input_tsv> <path/to/source_me.sh>

local_data_dir="$(readlink -f ${1})"
input_tsv="$(readlink -f ${2})"
output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv"
columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM")
source $(readlink -f ${3}) ## Path to helper function script should be provided as 3rd argument. https://github.com/poseidon-framework/poseidon-eager/blob/main/scripts/source_me.sh

## Index non-proliferated columns and exclude them from the finalised TSV
cut_selector=''
tsv_header=($(head -n1 ${input_tsv}))
for col_name in ${columns_to_keep[@]}; do
let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing
if [[ ! ${idx} -eq -1 ]]; then
cut_selector+="${idx},"
fi
done

## Remove added columns, and put columns in right order
cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv}
sed -i -e "s|<PATH_TO_DATA>|${local_data_dir}|g" ${output_tsv}

## Any further commands to edit the file before finalisation should be added below as shown
# sed -ie 's/replace_this/with_this/g' ${output_tsv}

## Keep track of versions
version_file="$(dirname ${input_tsv})/script_versions.txt"
## Remove versions from older run if there
grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new
## Then add new versions
echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new
echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new
mv ${version_file}.new ${version_file}
2 changes: 2 additions & 0 deletions packages/2024_Higgins_MuraUP/script_versions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
create_eager_input.sh: 0.5.1
source_me.sh for initial TSV: 0.5.2