Skip to content

Commit

Permalink
[automated] Create package recipe from SSF files
Browse files Browse the repository at this point in the history
  • Loading branch information
delphis-bot committed Jul 18, 2024
1 parent cb875cb commit 065c626
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 0 deletions.
40 changes: 40 additions & 0 deletions packages/2023_Salazar_MachuPicchu/2023_Salazar_MachuPicchu.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Keep track of config versions
config_template_version='0.3.0dev'
package_config_version='0.3.0dev'
minotaur_config_base='https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/main/conf'

// This configuration file is designed to be a used with the nf-core/eager pipeline.
// Instead of having to specify all other configurations for the Minotaur pipeline
// on runtime, they are all contained in this file and loaded automatically upon
// specifying this config file during runtime. Additionally, any parameters that
// need to be altered from the defaults can be specified here.
//
// The intention is to make it easy for users to understand and reproduce the output
// from processing with the Minotaur workflow processing from the contents of a
// single file.

// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch.
includeConfig "${minotaur_config_base}/EVA_cluster.config" // Cluster-specific configurations for nf-core/eager execution at MPI-EVA
includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing.

// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping.
// TODO: Select the appropriate config for the CaptureType of the package.
includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config"

params {
// Keep track of config file versions used when processing
config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}"
config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)"

/*
TODO: If you need to change any of the default processing parameters for this package
you can specify these parameters below.
Any parameters not specified in any of the config files default to their nf-core/eager default values.

For information on all available parameters and their default values see:
https://nf-co.re/eager/2.4.6/parameters

You can see the default values for parameters within poseidon-eager at:
https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config
*/
}
57 changes: 57 additions & 0 deletions packages/2023_Salazar_MachuPicchu/2023_Salazar_MachuPicchu.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file
KCA-3089 KCA-3089_KCA-3089 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KCA-3089_KCA-3089_L1_R1.fastq.gz NA NA ERR11518429.fastq.gz NA
KCA-1108 KCA-1108_KCA-1108 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KCA-1108_KCA-1108_L1_R1.fastq.gz NA NA ERR11518434.fastq.gz NA
KCA-1364 KCA-1364_KCA-1364 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KCA-1364_KCA-1364_L1_R1.fastq.gz NA NA ERR11518435.fastq.gz NA
MP13 MP13_MP13 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP13_MP13_L1_R1.fastq.gz <PATH_TO_DATA>/MP13_MP13_L1_R2.fastq.gz NA ERR11518444.fastq.gz ERR11518444_1.fastq.gz
KMA-28-1 KMA-28-1_KMA-28-1 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KMA-28-1_KMA-28-1_L1_R1.fastq.gz NA NA ERR11518463.fastq.gz NA
MP33 MP33_MP33 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP33_MP33_L1_R1.fastq.gz <PATH_TO_DATA>/MP33_MP33_L1_R2.fastq.gz NA ERR11518465.fastq.gz ERR11518465_1.fastq.gz
MP42B MP42B_MP42B 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP42B_MP42B_L1_R1.fastq.gz <PATH_TO_DATA>/MP42B_MP42B_L1_R2.fastq.gz NA ERR11518471.fastq.gz ERR11518471_1.fastq.gz
MP42A MP42A_MP42A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP42A_MP42A_L1_R1.fastq.gz <PATH_TO_DATA>/MP42A_MP42A_L1_R2.fastq.gz NA ERR11518472.fastq.gz ERR11518472_1.fastq.gz
MP48B MP48B_MP48B 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP48B_MP48B_L1_R1.fastq.gz <PATH_TO_DATA>/MP48B_MP48B_L1_R2.fastq.gz NA ERR11518477.fastq.gz ERR11518477_1.fastq.gz
MP50A MP50A_MP50A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP50A_MP50A_L1_R1.fastq.gz <PATH_TO_DATA>/MP50A_MP50A_L1_R2.fastq.gz NA ERR11518484.fastq.gz ERR11518484_1.fastq.gz
MP4f MP4f_MP4f 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP4f_MP4f_L1_R1.fastq.gz <PATH_TO_DATA>/MP4f_MP4f_L1_R2.fastq.gz NA ERR11518485.fastq.gz ERR11518485_1.fastq.gz
MP4i MP4i_MP4i 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP4i_MP4i_L1_R1.fastq.gz <PATH_TO_DATA>/MP4i_MP4i_L1_R2.fastq.gz NA ERR11518489.fastq.gz ERR11518489_1.fastq.gz
MP5A MP5A_MP5A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP5A_MP5A_L1_R1.fastq.gz <PATH_TO_DATA>/MP5A_MP5A_L1_R2.fastq.gz NA ERR11518495.fastq.gz ERR11518495_1.fastq.gz
MP65B MP65B_MP65B 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP65B_MP65B_L1_R1.fastq.gz <PATH_TO_DATA>/MP65B_MP65B_L1_R2.fastq.gz NA ERR11518496.fastq.gz ERR11518496_1.fastq.gz
MP27 MP27_MP27 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP27_MP27_L1_R1.fastq.gz <PATH_TO_DATA>/MP27_MP27_L1_R2.fastq.gz NA ERR11518499.fastq.gz ERR11518499_1.fastq.gz
OTT-1144 OTT-1144_OTT-1144 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/OTT-1144_OTT-1144_L1_R1.fastq.gz NA NA ERR11518523.fastq.gz NA
MP61 MP61_MP61 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP61_MP61_L1_R1.fastq.gz <PATH_TO_DATA>/MP61_MP61_L1_R2.fastq.gz NA ERR11518546.fastq.gz ERR11518546_1.fastq.gz
QKI-21-1 QKI-21-1_QKI-21-1 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/QKI-21-1_QKI-21-1_L1_R1.fastq.gz NA NA ERR11518553.fastq.gz NA
MP31A MP31A_MP31A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP31A_MP31A_L1_R1.fastq.gz <PATH_TO_DATA>/MP31A_MP31A_L1_R2.fastq.gz NA ERR11518458.fastq.gz ERR11518458_1.fastq.gz
KMA-15A-1 KMA-15A-1_KMA-15A-1 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KMA-15A-1_KMA-15A-1_L1_R1.fastq.gz NA NA ERR11518461.fastq.gz NA
MP45A MP45A_MP45A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP45A_MP45A_L1_R1.fastq.gz <PATH_TO_DATA>/MP45A_MP45A_L1_R2.fastq.gz NA ERR11518475.fastq.gz ERR11518475_1.fastq.gz
MP4E MP4E_MP4E 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP4E_MP4E_L1_R1.fastq.gz <PATH_TO_DATA>/MP4E_MP4E_L1_R2.fastq.gz NA ERR11518480.fastq.gz ERR11518480_1.fastq.gz
MP4D MP4D_MP4D 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP4D_MP4D_L1_R1.fastq.gz <PATH_TO_DATA>/MP4D_MP4D_L1_R2.fastq.gz NA ERR11518481.fastq.gz ERR11518481_1.fastq.gz
MP53A MP53A_MP53A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP53A_MP53A_L1_R1.fastq.gz <PATH_TO_DATA>/MP53A_MP53A_L1_R2.fastq.gz NA ERR11518486.fastq.gz ERR11518486_1.fastq.gz
MP55 MP55_MP55 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP55_MP55_L1_R1.fastq.gz <PATH_TO_DATA>/MP55_MP55_L1_R2.fastq.gz NA ERR11518487.fastq.gz ERR11518487_1.fastq.gz
MP78A MP78A_MP78A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP78A_MP78A_L1_R1.fastq.gz <PATH_TO_DATA>/MP78A_MP78A_L1_R2.fastq.gz NA ERR11518501.fastq.gz ERR11518501_1.fastq.gz
MP84A MP84A_MP84A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP84A_MP84A_L1_R1.fastq.gz <PATH_TO_DATA>/MP84A_MP84A_L1_R2.fastq.gz NA ERR11518514.fastq.gz ERR11518514_1.fastq.gz
QKI-809 QKI-809_QKI-809 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/QKI-809_QKI-809_L1_R1.fastq.gz NA NA ERR11518566.fastq.gz NA
QKI-9-3 QKI-9-3_QKI-9-3 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/QKI-9-3_QKI-9-3_L1_R1.fastq.gz NA NA ERR11518572.fastq.gz NA
SHN-575 SHN-575_SHN-575 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/SHN-575_SHN-575_L1_R1.fastq.gz NA NA ERR11518597.fastq.gz NA
KCA-1298 KCA-1298_KCA-1298 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KCA-1298_KCA-1298_L1_R1.fastq.gz NA NA ERR11518432.fastq.gz NA
CCA-7-2 CCA-7-2_CCA-7-2 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/CCA-7-2_CCA-7-2_L1_R1.fastq.gz NA NA ERR11518440.fastq.gz NA
MP107B MP107B_MP107B 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP107B_MP107B_L1_R1.fastq.gz <PATH_TO_DATA>/MP107B_MP107B_L1_R2.fastq.gz NA ERR11518443.fastq.gz ERR11518443_1.fastq.gz
MP23 MP23_MP23 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP23_MP23_L1_R1.fastq.gz <PATH_TO_DATA>/MP23_MP23_L1_R2.fastq.gz NA ERR11518448.fastq.gz ERR11518448_1.fastq.gz
KMA-2-2 KMA-2-2_KMA-2-2 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KMA-2-2_KMA-2-2_L1_R1.fastq.gz NA NA ERR11518456.fastq.gz NA
MP32 MP32_MP32 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP32_MP32_L1_R1.fastq.gz <PATH_TO_DATA>/MP32_MP32_L1_R2.fastq.gz NA ERR11518466.fastq.gz ERR11518466_1.fastq.gz
KMA-19-1 KMA-19-1_KMA-19-1 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/KMA-19-1_KMA-19-1_L1_R1.fastq.gz NA NA ERR11518467.fastq.gz NA
MP42C MP42C_MP42C 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP42C_MP42C_L1_R1.fastq.gz <PATH_TO_DATA>/MP42C_MP42C_L1_R2.fastq.gz NA ERR11518469.fastq.gz ERR11518469_1.fastq.gz
MP4B MP4B_MP4B 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP4B_MP4B_L1_R1.fastq.gz <PATH_TO_DATA>/MP4B_MP4B_L1_R2.fastq.gz NA ERR11518478.fastq.gz ERR11518478_1.fastq.gz
MP51 MP51_MP51 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP51_MP51_L1_R1.fastq.gz <PATH_TO_DATA>/MP51_MP51_L1_R2.fastq.gz NA ERR11518493.fastq.gz ERR11518493_1.fastq.gz
MP71 MP71_MP71 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP71_MP71_L1_R1.fastq.gz <PATH_TO_DATA>/MP71_MP71_L1_R2.fastq.gz NA ERR11518498.fastq.gz ERR11518498_1.fastq.gz
MP77A MP77A_MP77A 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP77A_MP77A_L1_R1.fastq.gz <PATH_TO_DATA>/MP77A_MP77A_L1_R2.fastq.gz NA ERR11518500.fastq.gz ERR11518500_1.fastq.gz
MP82 MP82_MP82 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP82_MP82_L1_R1.fastq.gz <PATH_TO_DATA>/MP82_MP82_L1_R2.fastq.gz NA ERR11518502.fastq.gz ERR11518502_1.fastq.gz
MP80 MP80_MP80 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP80_MP80_L1_R1.fastq.gz <PATH_TO_DATA>/MP80_MP80_L1_R2.fastq.gz NA ERR11518503.fastq.gz ERR11518503_1.fastq.gz
MP84C MP84C_MP84C 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP84C_MP84C_L1_R1.fastq.gz <PATH_TO_DATA>/MP84C_MP84C_L1_R2.fastq.gz NA ERR11518512.fastq.gz ERR11518512_1.fastq.gz
OTT-1160 OTT-1160_OTT-1160 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/OTT-1160_OTT-1160_L1_R1.fastq.gz NA NA ERR11518538.fastq.gz NA
OTT-1165 OTT-1165_OTT-1165 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/OTT-1165_OTT-1165_L1_R1.fastq.gz NA NA ERR11518543.fastq.gz NA
MP63 MP63_MP63 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP63_MP63_L1_R1.fastq.gz <PATH_TO_DATA>/MP63_MP63_L1_R2.fastq.gz NA ERR11518550.fastq.gz ERR11518550_1.fastq.gz
OTT-1174 OTT-1174_OTT-1174 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/OTT-1174_OTT-1174_L1_R1.fastq.gz NA NA ERR11518552.fastq.gz NA
QKI-4 QKI-4_QKI-4 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/QKI-4_QKI-4_L1_R1.fastq.gz NA NA ERR11518562.fastq.gz NA
OTT-1187 OTT-1187_OTT-1187 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/OTT-1187_OTT-1187_L1_R1.fastq.gz NA NA ERR11518565.fastq.gz NA
SHN-212 SHN-212_SHN-212 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/SHN-212_SHN-212_L1_R1.fastq.gz NA NA ERR11518568.fastq.gz NA
SHN-355 SHN-355_SHN-355 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/SHN-355_SHN-355_L1_R1.fastq.gz NA NA ERR11518578.fastq.gz NA
SHN-492 SHN-492_SHN-492 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/SHN-492_SHN-492_L1_R1.fastq.gz NA NA ERR11518582.fastq.gz NA
SHN-902 SHN-902_SHN-902 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/SHN-902_SHN-902_L1_R1.fastq.gz NA NA ERR11518584.fastq.gz NA
MP3a MP3a_MP3a 1 4 PE Homo sapiens (modern human) double half <PATH_TO_DATA>/MP3a_MP3a_L1_R1.fastq.gz <PATH_TO_DATA>/MP3a_MP3a_L1_R2.fastq.gz NA ERR11518632.fastq.gz ERR11518632_1.fastq.gz
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash
set -uo pipefail ## Pipefail, complain on new unassigned variables.

## Track the version of the TSV_patch template used
VERSION='0.2.0dev'

## This script is applied to the eager input TSV file locally to edit the dummy
## path to the fastQ files added by `create_eager_input.sh` to a real local
## path provided as a positional argument. Any further local tweaks to the
## TSV before running eager should be added below that in the form of bash
## commands to aid in reproducibility.

## usage tsv_patch.sh <local_data_dir> <input_tsv>

local_data_dir="$(readlink -f ${1})"
input_tsv="$(readlink -f ${2})"
output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv"
columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM")
source $(dirname ${2})/../../scripts/source_me.sh ## Load helper functions

## Index non-proliferated columns and exclude them from the finalised TSV
cut_selector=''
tsv_header=($(head -n1 ${input_tsv}))
for col_name in ${columns_to_keep[@]}; do
let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing
if [[ ! ${idx} -eq -1 ]]; then
cut_selector+="${idx},"
fi
done

## Remove added columns, and put columns in right order
cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv}
sed -i -e "s|<PATH_TO_DATA>|${local_data_dir}|g" ${output_tsv}

## Any further commands to edit the file before finalisation should be added below as shown
# sed -ie 's/replace_this/with_this/g' ${output_tsv}

## Keep track of versions
version_file="$(dirname ${input_tsv})/script_versions.txt"
## Remove versions from older run if there
grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new
## Then add new versions
echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new
echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new
mv ${version_file}.new ${version_file}
2 changes: 2 additions & 0 deletions packages/2023_Salazar_MachuPicchu/script_versions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
create_eager_input.sh: 0.2.1dev
source_me.sh for initial TSV: 0.2.1dev

0 comments on commit 065c626

Please sign in to comment.