diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..db6f505
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,5 @@
+## All line endings should be linux file endings
+* text eol=lf
+## git lfs support for bed and pos files
+*.pos filter=lfs diff=lfs merge=lfs -text
+*.bed filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
index 8eb7182..32facfb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,6 @@ eager/
 *.finalised.tsv
 array_Logs/
 array_tempfiles/
-testing_assets/
\ No newline at end of file
+testing_assets/
+poseidon_packages/
+.tmp/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf9cad8..9f4bfa4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,26 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v0.2.1dev - 02/11/2023
+
+### `Added`
+
+- `scripts/minotaur_packager.sh`:        Script to create poseidon half-packages and fill in janno from eager results
+- `scripts/populate_janno.py`:           Script to fill in janno files with poseidon metadata from nf-core eager results.
+
+### `Fixed`
+
+- `scripts/validate_downloaded_data.sh`: Add helptext
+- `scripts/run_eager.sh`:                Now uses `big_data` profile
+- Updates to templates for packages and configs. These are now defunct as they are pulled from the minotaur-recipes repo, and will be removed in netx release.
+- `scripts/download_and_localise_package_files.sh` distinction between symlink dir and package_eager_dir.
+
+### `Dependencies`
+
+- nf-core/eager=2.4.6
+
+### `Deprecated`
+
 ## v0.2.0dev - 25/04/2023
 
 ### `Added`
diff --git a/assets/template.config b/assets/template.config
index 1b03016..0ba9aec 100644
--- a/assets/template.config
+++ b/assets/template.config
@@ -1,6 +1,6 @@
 // Keep track of config versions
-config_template_version='0.2.0dev'
-package_config_version='0.2.0dev'
+config_template_version='0.2.2dev'
+package_config_version='0.2.2dev'
 
 // This configuration file is designed to be a used with the nf-core/eager pipeline.
 //   Instead of having to specify all other configurations for the Minotaur pipeline
@@ -22,7 +22,7 @@ includeConfig '../../conf/CaptureType_profiles/1240K.config'
 
 params {
   // Keep track of config file versions used when processing
-  config_profile_description = "${config_profile_description}\nconfig_template_version: ${config_template_version}\npackage_config_version: ${package_config_version}"
+  config_profile_description = "${config_profile_description}, config_template_version: ${config_template_version}, package_config_version: ${package_config_version}"
   config_profile_contact     = "Thiseas C. Lamnidis (@TCLamnidis)"
 
   /* 
diff --git a/conf/CaptureType_profiles/1240K.config b/conf/CaptureType_profiles/1240K.config
index 508c3b8..7154e93 100644
--- a/conf/CaptureType_profiles/1240K.config
+++ b/conf/CaptureType_profiles/1240K.config
@@ -1,12 +1,12 @@
 // Keep track of config versions
-capturetype_config_version='0.2.0dev'
+capturetype_config_version='0.2.2dev'
 
 //TODO Backup all bed/snp files in the repo somewhere and use from central location instead of hard-coding paths.
 // Profile specifying capture-specific parameters for packages of the '1240K' CaptureType.
 params{
     // Keep track of config file versions used when processing
     config_profile_name = "${config_profile_name}, on 1240K sites"
-    config_profile_description="${config_profile_description}\n1240K.config: ${capturetype_config_version}"
+    config_profile_description="${config_profile_description}\n - CaptureType.1240K.config: ${capturetype_config_version}"
 
     // Qualimap bedfile for on-target coverage calculation
     snpcapture_bed        = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'
diff --git a/conf/Minotaur.config b/conf/Minotaur.config
index d11accd..df2308f 100644
--- a/conf/Minotaur.config
+++ b/conf/Minotaur.config
@@ -1,30 +1,32 @@
 // Keep track of config versions
-minotaur_config_version='0.2.0dev'
+minotaur_config_version='0.2.1dev'
 
 // Default parameters for processing of data through Minotaur workflow.
 params{
     // Keep track of config file versions used when processing
     config_profile_name = "Minotaur pipeline config"
-    config_profile_description = "Minortaur.config: ${minotaur_config_version}"
+    config_profile_description = "Minotaur.config: ${minotaur_config_version}"
     config_profile_url = 'https://github.com/poseidon-framework/poseidon-eager'
 
+    // Skip steps
+    skip_preseq = true  // This data is published so won't be sequencing it more.
 
     // Mapping
     bwaalnn = 0.01
-    
+
     // BAM filtering
     run_bam_filtering = true            // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
     bam_unmapped_type = 'fastq'         // Keep unmapped reads as a separate fastq file. Preferred format for possible future pathogen screening.
     bam_mapping_quality_threshold = 30    // Keep MapQ 30+ (together with snpcapture_bed is needed for poseidon "coverage on target SNPs" field)
     // The above also means that reads that are mapped with MapQ below 30 are lost after filtering, not present in the fastq OR the filtered bam!
-    
+
     // mtDNA to nuclear ratio
     run_mtnucratio = true
     mtnucratio_header = "MT"
-    
+
     // Bam Trimming
     run_trim_bam = true
-    
+
     // Double-stranded library clipping parameters
     bamutils_clip_double_stranded_half_udg_left  = 2   // Trim 2 bp of either side for dsDNA half-UDG libraries.
     bamutils_clip_double_stranded_half_udg_right = 2   // Trim 2 bp of either side for dsDNA half-UDG libraries.
diff --git a/packages/2021_PattersonNature/2021_PattersonNature.config b/packages/2021_PattersonNature/2021_PattersonNature.config
index 229a3c2..1b03016 100644
--- a/packages/2021_PattersonNature/2021_PattersonNature.config
+++ b/packages/2021_PattersonNature/2021_PattersonNature.config
@@ -34,6 +34,6 @@ params {
     https://nf-co.re/eager/2.4.6/parameters
   
   You can see the default values for parameters within poseidon-eager at: 
-    https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Poseidon.config
+    https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config
   */
 }
diff --git a/packages/2021_PattersonNature/2021_PattersonNature.tsv b/packages/2021_PattersonNature/2021_PattersonNature.tsv
index 0b5c617..17fe08d 100644
--- a/packages/2021_PattersonNature/2021_PattersonNature.tsv
+++ b/packages/2021_PattersonNature/2021_PattersonNature.tsv
@@ -253,7 +253,6 @@ I13714	I13714_ERS8208145	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I13713	I13713_ERS8208143	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I13713_ERS8208143_L1_R1.fastq.gz	NA	NA	ERR7194848.fastq.gz	NA
 I13711	I13711_ERS8208139	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I13711_ERS8208139_L1_R1.fastq.gz	NA	NA	ERR7194844.fastq.gz	NA
 I1389	I1389_ERS8208234	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I1389_ERS8208234_L1_R1.fastq.gz	NA	NA	ERR7194939.fastq.gz	NA
-I1389	I1389_ERS8208233	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I1389_ERS8208233_L1_R1.fastq.gz	NA	NA	0	1
 I14192	I14192_ERS8208274	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I14192_ERS8208274_L1_R1.fastq.gz	NA	NA	ERR7194979.fastq.gz	NA
 I15045	I15045_ERS8208427	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I15045_ERS8208427_L1_R1.fastq.gz	NA	NA	ERR7195132.fastq.gz	NA
 I14190	I14190_ERS8208270	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I14190_ERS8208270_L1_R1.fastq.gz	NA	NA	ERR7194975.fastq.gz	NA
@@ -281,7 +280,6 @@ I14188	I14188_ERS8208265	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I14191	I14191_ERS8208272	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I14191_ERS8208272_L1_R1.fastq.gz	NA	NA	ERR7194977.fastq.gz	NA
 I13890	I13890_ERS8208219	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I13890_ERS8208219_L1_R1.fastq.gz	NA	NA	ERR7194924.fastq.gz	NA
 I13795	I13795_ERS8208209	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I13795_ERS8208209_L1_R1.fastq.gz	NA	NA	ERR7194914.fastq.gz	NA
-I1391	I1391_ERS8208235	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I1391_ERS8208235_L1_R1.fastq.gz	NA	NA	0	1
 I13898	I13898_ERS8208229	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I13898_ERS8208229_L1_R1.fastq.gz	NA	NA	ERR7194934.fastq.gz	NA
 I13899	I13899_ERS8208232	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I13899_ERS8208232_L1_R1.fastq.gz	NA	NA	ERR7194937.fastq.gz	NA
 I13897	I13897_ERS8208228	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I13897_ERS8208228_L1_R1.fastq.gz	NA	NA	ERR7194933.fastq.gz	NA
@@ -750,7 +748,6 @@ I16612	I16612_ERS8208683	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I11154	I11154_ERS8207914	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11154_ERS8207914_L1_R1.fastq.gz	NA	NA	ERR7194619.fastq.gz	NA
 I11156	I11156_ERS8207915	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11156_ERS8207915_L1_R1.fastq.gz	NA	NA	ERR7194620.fastq.gz	NA
 I11156	I11156_ERS8207916	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11156_ERS8207916_L1_R1.fastq.gz	NA	NA	ERR7194621.fastq.gz	NA
-I11158	I11158_ERS8207917	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11158_ERS8207917_L1_R1.fastq.gz	NA	NA	0	1
 I11158	I11158_ERS8207918	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11158_ERS8207918_L1_R1.fastq.gz	NA	NA	ERR7194623.fastq.gz	NA
 I12097_ss	I12097_ss_ERS8207978_ss	1	4	SE	Homo sapiens (modern human)	single	half	<PATH_TO_DATA>/I12097_ss_ERS8207978_ss_L1_R1.fastq.gz	NA	NA	ERR7194683.fastq.gz	NA
 I17139_ss	I17139_ss_ERS8208715_ss	1	4	SE	Homo sapiens (modern human)	single	half	<PATH_TO_DATA>/I17139_ss_ERS8208715_ss_L1_R1.fastq.gz	NA	NA	ERR7195420.fastq.gz	NA
@@ -761,12 +758,10 @@ I17017	I17017_ERS8208711	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I17143_ss	I17143_ss_ERS8208717_ss	1	4	SE	Homo sapiens (modern human)	single	half	<PATH_TO_DATA>/I17143_ss_ERS8208717_ss_L1_R1.fastq.gz	NA	NA	ERR7195422.fastq.gz	NA
 I17139_ss	I17139_ss_ERS8208716_ss	1	4	SE	Homo sapiens (modern human)	single	half	<PATH_TO_DATA>/I17139_ss_ERS8208716_ss_L1_R1.fastq.gz	NA	NA	ERR7195421.fastq.gz	NA
 I12098	I12098_ERS8207979	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I12098_ERS8207979_L1_R1.fastq.gz	NA	NA	ERR7194684.fastq.gz	NA
-I11159	I11159_ERS8207919	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11159_ERS8207919_L1_R1.fastq.gz	NA	NA	0	1
 I12098	I12098_ERS8207980	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I12098_ERS8207980_L1_R1.fastq.gz	NA	NA	ERR7194685.fastq.gz	NA
 I11152	I11152_ERS8207909	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11152_ERS8207909_L1_R1.fastq.gz	NA	NA	ERR7194614.fastq.gz	NA
 I11151	I11151_ERS8207908	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11151_ERS8207908_L1_R1.fastq.gz	NA	NA	ERR7194613.fastq.gz	NA
 I11159	I11159_ERS8207920	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11159_ERS8207920_L1_R1.fastq.gz	NA	NA	ERR7194625.fastq.gz	NA
-I11160	I11160_ERS8207921	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11160_ERS8207921_L1_R1.fastq.gz	NA	NA	0	1
 I11160	I11160_ERS8207922	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11160_ERS8207922_L1_R1.fastq.gz	NA	NA	ERR7194627.fastq.gz	NA
 I11629	I11629_ERS8207923	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11629_ERS8207923_L1_R1.fastq.gz	NA	NA	ERR7194628.fastq.gz	NA
 I12099	I12099_ERS8207981	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I12099_ERS8207981_L1_R1.fastq.gz	NA	NA	ERR7194686.fastq.gz	NA
@@ -1353,13 +1348,9 @@ I23995	I23995_ERS8209257	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I23995	I23995_ERS8209258	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I23995_ERS8209258_L1_R1.fastq.gz	NA	NA	ERR7195963.fastq.gz	NA
 I23996	I23996_ERS8209259	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I23996_ERS8209259_L1_R1.fastq.gz	NA	NA	ERR7195964.fastq.gz	NA
 I23996	I23996_ERS8209260	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I23996_ERS8209260_L1_R1.fastq.gz	NA	NA	ERR7195965.fastq.gz	NA
-I2405	I2405_ERS8209261	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2405_ERS8209261_L1_R1.fastq.gz	NA	NA	0	1
 I2405	I2405_ERS8209262	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2405_ERS8209262_L1_R1.fastq.gz	NA	NA	ERR7195967.fastq.gz	NA
-I2407	I2407_ERS8209263	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2407_ERS8209263_L1_R1.fastq.gz	NA	NA	0	1
 I2407	I2407_ERS8209264	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2407_ERS8209264_L1_R1.fastq.gz	NA	NA	ERR7195969.fastq.gz	NA
-I2413	I2413_ERS8209265	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2413_ERS8209265_L1_R1.fastq.gz	NA	NA	0	1
 I2413	I2413_ERS8209266	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2413_ERS8209266_L1_R1.fastq.gz	NA	NA	ERR7195971.fastq.gz	NA
-I2419	I2419_ERS8209267	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2419_ERS8209267_L1_R1.fastq.gz	NA	NA	0	1
 I2419	I2419_ERS8209268	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2419_ERS8209268_L1_R1.fastq.gz	NA	NA	ERR7195973.fastq.gz	NA
 I24342	I24342_ERS8209269	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I24342_ERS8209269_L1_R1.fastq.gz	NA	NA	ERR7195974.fastq.gz	NA
 I24342	I24342_ERS8209270	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I24342_ERS8209270_L1_R1.fastq.gz	NA	NA	ERR7195975.fastq.gz	NA
@@ -1367,7 +1358,6 @@ I24344	I24344_ERS8209271	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I24344	I24344_ERS8209272	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I24344_ERS8209272_L1_R1.fastq.gz	NA	NA	ERR7195977.fastq.gz	NA
 I24345	I24345_ERS8209273	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I24345_ERS8209273_L1_R1.fastq.gz	NA	NA	ERR7195978.fastq.gz	NA
 I24345	I24345_ERS8209274	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I24345_ERS8209274_L1_R1.fastq.gz	NA	NA	ERR7195979.fastq.gz	NA
-I2435	I2435_ERS8209275	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2435_ERS8209275_L1_R1.fastq.gz	NA	NA	0	1
 I2435	I2435_ERS8209276	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2435_ERS8209276_L1_R1.fastq.gz	NA	NA	ERR7195981.fastq.gz	NA
 I2446	I2446_ERS8209277	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2446_ERS8209277_L1_R1.fastq.gz	NA	NA	ERR7195982.fastq.gz	NA
 I2446	I2446_ERS8209278	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2446_ERS8209278_L1_R1.fastq.gz	NA	NA	ERR7195983.fastq.gz	NA
@@ -1419,19 +1409,15 @@ I25525	I25525_ERS8209323	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I25525	I25525_ERS8209324	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I25525_ERS8209324_L1_R1.fastq.gz	NA	NA	ERR7196029.fastq.gz	NA
 I2565	I2565_ERS8209325	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2565_ERS8209325_L1_R1.fastq.gz	NA	NA	ERR7196030.fastq.gz	NA
 I2565	I2565_ERS8209326	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2565_ERS8209326_L1_R1.fastq.gz	NA	NA	ERR7196031.fastq.gz	NA
-I2569	I2569_ERS8209327	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2569_ERS8209327_L1_R1.fastq.gz	NA	NA	0	1
 I2569	I2569_ERS8209328	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2569_ERS8209328_L1_R1.fastq.gz	NA	NA	ERR7196033.fastq.gz	NA
 I2574	I2574_ERS8209329	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2574_ERS8209329_L1_R1.fastq.gz	NA	NA	ERR7196034.fastq.gz	NA
 I2574	I2574_ERS8209330	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2574_ERS8209330_L1_R1.fastq.gz	NA	NA	ERR7196035.fastq.gz	NA
-I2575	I2575_ERS8209331	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2575_ERS8209331_L1_R1.fastq.gz	NA	NA	0	1
 I2575	I2575_ERS8209332	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2575_ERS8209332_L1_R1.fastq.gz	NA	NA	ERR7196037.fastq.gz	NA
 I2598	I2598_ERS8209333	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2598_ERS8209333_L1_R1.fastq.gz	NA	NA	ERR7196038.fastq.gz	NA
 I2598	I2598_ERS8209334	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2598_ERS8209334_L1_R1.fastq.gz	NA	NA	ERR7196039.fastq.gz	NA
 I2611	I2611_ERS8209335	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2611_ERS8209335_L1_R1.fastq.gz	NA	NA	ERR7196040.fastq.gz	NA
 I2611	I2611_ERS8209336	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2611_ERS8209336_L1_R1.fastq.gz	NA	NA	ERR7196041.fastq.gz	NA
-I2629	I2629_ERS8209337	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2629_ERS8209337_L1_R1.fastq.gz	NA	NA	0	1
 I2629	I2629_ERS8209338	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2629_ERS8209338_L1_R1.fastq.gz	NA	NA	ERR7196043.fastq.gz	NA
-I2656	I2656_ERS8209339	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2656_ERS8209339_L1_R1.fastq.gz	NA	NA	0	1
 I2656	I2656_ERS8209340	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2656_ERS8209340_L1_R1.fastq.gz	NA	NA	ERR7196045.fastq.gz	NA
 I2658	I2658_ERS8209341	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2658_ERS8209341_L1_R1.fastq.gz	NA	NA	ERR7196046.fastq.gz	NA
 I2658	I2658_ERS8209342	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2658_ERS8209342_L1_R1.fastq.gz	NA	NA	ERR7196047.fastq.gz	NA
@@ -1477,7 +1463,6 @@ I2695	I2695_ERS8209381	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_D
 I2695	I2695_ERS8209382	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2695_ERS8209382_L1_R1.fastq.gz	NA	NA	ERR7196087.fastq.gz	NA
 I2696	I2696_ERS8209383	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2696_ERS8209383_L1_R1.fastq.gz	NA	NA	ERR7196088.fastq.gz	NA
 I2696	I2696_ERS8209384	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2696_ERS8209384_L1_R1.fastq.gz	NA	NA	ERR7196089.fastq.gz	NA
-I2699	I2699_ERS8209385	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2699_ERS8209385_L1_R1.fastq.gz	NA	NA	0	1
 I2699	I2699_ERS8209386	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2699_ERS8209386_L1_R1.fastq.gz	NA	NA	ERR7196091.fastq.gz	NA
 I27379	I27379_ERS8209387	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I27379_ERS8209387_L1_R1.fastq.gz	NA	NA	ERR7196092.fastq.gz	NA
 I27379	I27379_ERS8209388	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I27379_ERS8209388_L1_R1.fastq.gz	NA	NA	ERR7196093.fastq.gz	NA
@@ -1493,11 +1478,9 @@ I27384	I27384_ERS8209397	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO
 I27384	I27384_ERS8209398	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I27384_ERS8209398_L1_R1.fastq.gz	NA	NA	ERR7196103.fastq.gz	NA
 I27385	I27385_ERS8209399	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I27385_ERS8209399_L1_R1.fastq.gz	NA	NA	ERR7196104.fastq.gz	NA
 I27385	I27385_ERS8209400	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I27385_ERS8209400_L1_R1.fastq.gz	NA	NA	ERR7196105.fastq.gz	NA
-I2796	I2796_ERS8209401	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2796_ERS8209401_L1_R1.fastq.gz	NA	NA	0	1
 I2796	I2796_ERS8209402	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2796_ERS8209402_L1_R1.fastq.gz	NA	NA	ERR7196107.fastq.gz	NA
 I2799	I2799_ERS8209403	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2799_ERS8209403_L1_R1.fastq.gz	NA	NA	ERR7196108.fastq.gz	NA
 I2799	I2799_ERS8209404	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2799_ERS8209404_L1_R1.fastq.gz	NA	NA	ERR7196109.fastq.gz	NA
-I2824	I2824_ERS8209405	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2824_ERS8209405_L1_R1.fastq.gz	NA	NA	0	1
 I2824	I2824_ERS8209406	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2824_ERS8209406_L1_R1.fastq.gz	NA	NA	ERR7196111.fastq.gz	NA
 I2982	I2982_ERS8209407	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2982_ERS8209407_L1_R1.fastq.gz	NA	NA	ERR7196112.fastq.gz	NA
 I2982	I2982_ERS8209408	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I2982_ERS8209408_L1_R1.fastq.gz	NA	NA	ERR7196113.fastq.gz	NA
@@ -1511,7 +1494,6 @@ I3035	I3035_ERS8209415	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_D
 I3035	I3035_ERS8209416	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3035_ERS8209416_L1_R1.fastq.gz	NA	NA	ERR7196121.fastq.gz	NA
 I3083	I3083_ERS8209417	1	4	SE	Homo sapiens (modern human)	double	full	<PATH_TO_DATA>/I3083_ERS8209417_L1_R1.fastq.gz	NA	NA	ERR7196122.fastq.gz	NA
 I3083	I3083_ERS8209418	1	4	SE	Homo sapiens (modern human)	double	full	<PATH_TO_DATA>/I3083_ERS8209418_L1_R1.fastq.gz	NA	NA	ERR7196123.fastq.gz	NA
-I3137	I3137_ERS8209419	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3137_ERS8209419_L1_R1.fastq.gz	NA	NA	0	1
 I3137	I3137_ERS8209420	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3137_ERS8209420_L1_R1.fastq.gz	NA	NA	ERR7196125.fastq.gz	NA
 I3139	I3139_ERS8209421	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3139_ERS8209421_L1_R1.fastq.gz	NA	NA	ERR7196126.fastq.gz	NA
 I3139	I3139_ERS8209422	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3139_ERS8209422_L1_R1.fastq.gz	NA	NA	ERR7196127.fastq.gz	NA
@@ -1529,7 +1511,6 @@ I3567	I3567_ERS8209433	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_D
 I3567	I3567_ERS8209434	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3567_ERS8209434_L1_R1.fastq.gz	NA	NA	ERR7196139.fastq.gz	NA
 I3568	I3568_ERS8209435	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3568_ERS8209435_L1_R1.fastq.gz	NA	NA	ERR7196140.fastq.gz	NA
 I3568	I3568_ERS8209436	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I3568_ERS8209436_L1_R1.fastq.gz	NA	NA	ERR7196141.fastq.gz	NA
-I4067	I4067_ERS8209437	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I4067_ERS8209437_L1_R1.fastq.gz	NA	NA	0	1
 I4067	I4067_ERS8209438	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I4067_ERS8209438_L1_R1.fastq.gz	NA	NA	ERR7196143.fastq.gz	NA
 I4188	I4188_ERS8209439	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I4188_ERS8209439_L1_R1.fastq.gz	NA	NA	ERR7196144.fastq.gz	NA
 I4188	I4188_ERS8209440	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I4188_ERS8209440_L1_R1.fastq.gz	NA	NA	ERR7196145.fastq.gz	NA
@@ -1547,9 +1528,7 @@ I5365	I5365_ERS8209451	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_D
 I5365	I5365_ERS8209452	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5365_ERS8209452_L1_R1.fastq.gz	NA	NA	ERR7196157.fastq.gz	NA
 I5440	I5440_ERS8209453	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5440_ERS8209453_L1_R1.fastq.gz	NA	NA	ERR7196158.fastq.gz	NA
 I5440	I5440_ERS8209454	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5440_ERS8209454_L1_R1.fastq.gz	NA	NA	ERR7196159.fastq.gz	NA
-I5471	I5471_ERS8209455	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5471_ERS8209455_L1_R1.fastq.gz	NA	NA	0	1
 I5471	I5471_ERS8209456	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5471_ERS8209456_L1_R1.fastq.gz	NA	NA	ERR7196161.fastq.gz	NA
-I5474	I5474_ERS8209457	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5474_ERS8209457_L1_R1.fastq.gz	NA	NA	0	1
 I5474	I5474_ERS8209458	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5474_ERS8209458_L1_R1.fastq.gz	NA	NA	ERR7196163.fastq.gz	NA
 I5502	I5502_ERS8209459	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5502_ERS8209459_L1_R1.fastq.gz	NA	NA	ERR7196164.fastq.gz	NA
 I5502	I5502_ERS8209460	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I5502_ERS8209460_L1_R1.fastq.gz	NA	NA	ERR7196165.fastq.gz	NA
@@ -1601,23 +1580,14 @@ I7629	I7629_ERS8209505	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_D
 I7629	I7629_ERS8209506	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7629_ERS8209506_L1_R1.fastq.gz	NA	NA	ERR7196211.fastq.gz	NA
 I7632	I7632_ERS8209507	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7632_ERS8209507_L1_R1.fastq.gz	NA	NA	ERR7196212.fastq.gz	NA
 I7632	I7632_ERS8209508	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7632_ERS8209508_L1_R1.fastq.gz	NA	NA	ERR7196213.fastq.gz	NA
-I7950	I7950_ERS8209509	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7950_ERS8209509_L1_R1.fastq.gz	NA	NA	0	1
 I7950	I7950_ERS8209510	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7950_ERS8209510_L1_R1.fastq.gz	NA	NA	ERR7196215.fastq.gz	NA
-I7951	I7951_ERS8209511	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7951_ERS8209511_L1_R1.fastq.gz	NA	NA	0	1
 I7951	I7951_ERS8209512	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7951_ERS8209512_L1_R1.fastq.gz	NA	NA	ERR7196217.fastq.gz	NA
-I7952	I7952_ERS8209513	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7952_ERS8209513_L1_R1.fastq.gz	NA	NA	0	1
 I7952	I7952_ERS8209514	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7952_ERS8209514_L1_R1.fastq.gz	NA	NA	ERR7196219.fastq.gz	NA
-I7953	I7953_ERS8209515	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7953_ERS8209515_L1_R1.fastq.gz	NA	NA	0	1
 I7953	I7953_ERS8209516	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7953_ERS8209516_L1_R1.fastq.gz	NA	NA	ERR7196221.fastq.gz	NA
-I7958	I7958_ERS8209517	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7958_ERS8209517_L1_R1.fastq.gz	NA	NA	0	1
 I7958	I7958_ERS8209518	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7958_ERS8209518_L1_R1.fastq.gz	NA	NA	ERR7196223.fastq.gz	NA
-I7959	I7959_ERS8209519	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7959_ERS8209519_L1_R1.fastq.gz	NA	NA	0	1
 I7959	I7959_ERS8209520	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7959_ERS8209520_L1_R1.fastq.gz	NA	NA	ERR7196225.fastq.gz	NA
-I7960	I7960_ERS8209521	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7960_ERS8209521_L1_R1.fastq.gz	NA	NA	0	1
 I7960	I7960_ERS8209522	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7960_ERS8209522_L1_R1.fastq.gz	NA	NA	ERR7196227.fastq.gz	NA
-I7962	I7962_ERS8209523	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7962_ERS8209523_L1_R1.fastq.gz	NA	NA	0	1
 I7962	I7962_ERS8209524	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7962_ERS8209524_L1_R1.fastq.gz	NA	NA	ERR7196229.fastq.gz	NA
-I7963	I7963_ERS8209525	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7963_ERS8209525_L1_R1.fastq.gz	NA	NA	0	1
 I7963	I7963_ERS8209526	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7963_ERS8209526_L1_R1.fastq.gz	NA	NA	ERR7196231.fastq.gz	NA
 I7964	I7964_ERS8209527	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7964_ERS8209527_L1_R1.fastq.gz	NA	NA	ERR7196232.fastq.gz	NA
 I7964	I7964_ERS8209528	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I7964_ERS8209528_L1_R1.fastq.gz	NA	NA	ERR7196233.fastq.gz	NA
@@ -1637,17 +1607,11 @@ I0525	I0525_ERS8207822	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_D
 I0525	I0525_ERS8207823	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I0525_ERS8207823_L1_R1.fastq.gz	NA	NA	ERR7194575.fastq.gz	NA
 I0527	I0527_ERS8207824	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I0527_ERS8207824_L1_R1.fastq.gz	NA	NA	ERR7194576.fastq.gz	NA
 I0527	I0527_ERS8207825	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I0527_ERS8207825_L1_R1.fastq.gz	NA	NA	ERR7194577.fastq.gz	NA
-I10342	I10342_ERS8207826	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10342_ERS8207826_L1_R1.fastq.gz	NA	NA	0	1
 I10342	I10342_ERS8207827	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10342_ERS8207827_L1_R1.fastq.gz	NA	NA	ERR7194579.fastq.gz	NA
-I10343	I10343_ERS8207828	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10343_ERS8207828_L1_R1.fastq.gz	NA	NA	0	1
 I10343	I10343_ERS8207829	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10343_ERS8207829_L1_R1.fastq.gz	NA	NA	ERR7194581.fastq.gz	NA
-I10344	I10344_ERS8207830	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10344_ERS8207830_L1_R1.fastq.gz	NA	NA	0	1
 I10344	I10344_ERS8207831	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10344_ERS8207831_L1_R1.fastq.gz	NA	NA	ERR7194583.fastq.gz	NA
-I10345	I10345_ERS8207832	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10345_ERS8207832_L1_R1.fastq.gz	NA	NA	0	1
 I10345	I10345_ERS8207833	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10345_ERS8207833_L1_R1.fastq.gz	NA	NA	ERR7194585.fastq.gz	NA
-I10347	I10347_ERS8207834	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10347_ERS8207834_L1_R1.fastq.gz	NA	NA	0	1
 I10347	I10347_ERS8207835	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10347_ERS8207835_L1_R1.fastq.gz	NA	NA	ERR7194587.fastq.gz	NA
-I10348	I10348_ERS8207836	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10348_ERS8207836_L1_R1.fastq.gz	NA	NA	0	1
 I10348	I10348_ERS8207837	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I10348_ERS8207837_L1_R1.fastq.gz	NA	NA	ERR7194589.fastq.gz	NA
 I11033	I11033_ERS8207838	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11033_ERS8207838_L1_R1.fastq.gz	NA	NA	ERR7194590.fastq.gz	NA
 I11033	I11033_ERS8207839	1	4	SE	Homo sapiens (modern human)	double	half	<PATH_TO_DATA>/I11033_ERS8207839_L1_R1.fastq.gz	NA	NA	ERR7194591.fastq.gz	NA
diff --git a/packages/2021_PattersonNature/script_versions.txt b/packages/2021_PattersonNature/script_versions.txt
index fc73d50..e2cf8ab 100644
--- a/packages/2021_PattersonNature/script_versions.txt
+++ b/packages/2021_PattersonNature/script_versions.txt
@@ -1,2 +1,7 @@
-create_eager_input.sh:	0.2.0dev
+create_eager_input.sh:	0.2.1dev
 source_me.sh for initial TSV:	0.2.0dev
+download_ena_data.py:	0.2.0dev
+validate_downloaded_data.sh:	0.2.1dev
+source_me.sh for data validation:	0.2.0dev
+2021_PattersonNature.tsv_patch.sh:	0.2.0dev
+source_me.sh for final TSV:	0.2.0dev
diff --git a/scripts/download_and_localise_package_files.sh b/scripts/download_and_localise_package_files.sh
index fee662f..b63d3bf 100755
--- a/scripts/download_and_localise_package_files.sh
+++ b/scripts/download_and_localise_package_files.sh
@@ -46,7 +46,8 @@ ssf_file="${package_dir}/${package_name}.ssf"
 download_dir="${repo_dir}/raw_sequencing_data/"
 download_log_dir="${download_dir}/download_logs"
 local_data_dir="${repo_dir}/raw_sequencing_data/${package_name}"
-symlink_dir="${repo_dir}/eager/${package_name}/data"
+package_eager_dir="${repo_dir}/eager/${package_name}/"
+symlink_dir="${package_eager_dir}/data"
 tsv_patch_fn="${package_dir}/${package_name}.tsv_patch.sh"
 original_tsv="${package_dir}/${package_name}.tsv"
 
@@ -59,7 +60,7 @@ check_fail $? "${script_debug_string} Downloads did not finish completely. Try a
 
 ## STEP 2: Validate downloaded files.
 mkdir -p ${symlink_dir}
-${repo_dir}/scripts/validate_downloaded_data.sh ${ssf_file} ${local_data_dir} ${symlink_dir}
+${repo_dir}/scripts/validate_downloaded_data.sh ${ssf_file} ${local_data_dir} ${package_eager_dir}
 check_fail $? "${script_debug_string} Validation and symlink creation failed."
 
 ## STEP 3: Localise TSV file.
diff --git a/scripts/minotaur_packager.sh b/scripts/minotaur_packager.sh
new file mode 100755
index 0000000..cc26c49
--- /dev/null
+++ b/scripts/minotaur_packager.sh
@@ -0,0 +1,305 @@
+#!/usr/bin/env bash
+VERSION='0.2.1dev'
+set -o pipefail ## Pipefail, complain on new unassigned variables.
+# set -x ## Debugging
+
+## Helptext function
+function Helptext() {
+  echo -ne "\t usage: ${0} [options] Package_Minotaur_Directory\n\n"
+  echo -ne "This script collects the genotype data and metadata from Minotaur-processing and creates/updates the requested poseidon package if needed.\n\n"
+  echo -ne "Options:\n"
+  echo -ne "-d, --debug\t\tActivates debug mode, and keeps temporary directories for troubleshooting.\n"
+  echo -ne "-h, --help\t\tPrint this text and exit.\n"
+  echo -ne "-v, --version\t\tPrint version and exit.\n"
+}
+
+## Source helper functions
+repo_dir=$(dirname $(readlink -f ${0}))/..  ## The repository will be the original position of this script. If a user copies instead of symlink, this will fail.
+source ${repo_dir}/scripts/source_me.sh     ## Source helper functions
+
+## A function to make a genotype dataset for the output package out of an array of genotype files.
+## Usage make_genotype_dataset_out_of_genotypes <format> <out_name> <tempdir> <geno_fn1> <geno_fn2> ...
+##   format:    The format of the genotype files. Either 'PLINK' or 'EIGENSTRAT'.
+##   out_name:  The name of the output genotype dataset.
+##   tempdir:   The temporary directory to use for mixing the genotypes.
+##   geno_fn*:  The genotype files to merge together.
+## NOTE: This function uses the errecho() function defined in source_me.sh
+function make_genotype_dataset_out_of_genotypes() {
+  local format
+  local tempdir
+  local out_name
+  local input_fns
+  local input_fn
+  local base_fn
+  local ind_fns
+  local geno_top_length
+  local geno_bot_length
+
+  format=${1}
+  out_name=${2}
+  tempdir=${3}
+  shift 3
+  input_fns=("${@}")
+
+  ## Check that the format is valid.
+  if [[ ${format} != "PLINK" && ${format} != "EIGENSTRAT" ]]; then
+    errecho -r "[make_genotype_dataset_out_of_genotypes()]: Invalid format '${format}' provided."
+    exit 1
+  fi
+
+  ## Ensure the provided temp dir exists
+  if [[ ! -d ${tempdir} ]]; then
+    errecho -r "[make_genotype_dataset_out_of_genotypes()]: Temporary directory '${tempdir}' not found."
+    exit 1
+  fi
+
+  ## Check that the genotype files exist.
+  for input_fn in ${input_fns[@]}; do
+    if [[ ! -f ${input_fn} ]]; then
+      errecho -r "[make_genotype_dataset_out_of_genotypes()]: Genotype file '${input_fn}' not found."
+      exit 1
+    fi
+  done
+
+  ## Merge eigenstrat genotypes
+  if [[ ${format} == "EIGENSTRAT" ]]; then
+    ## Paste genos together. If only one is there, then it's simply a copy of it
+    paste -d '\0' ${input_fns[@]} > ${tempdir}/${out_name}.geno
+
+    ## Copy the snp file
+    cp ${input_fns[0]%.geno}.snp ${tempdir}/${out_name}.snp
+
+    ## And cat the ind files (this needs a bit of variable expansion to work)
+    ind_fns=''
+    for base_fn in ${input_fns[@]%.geno}; do
+      ind_fns+="${base_fn}.ind "
+    done
+    ## Also add '_MNT' suffix to individual IDs. Set input and output field sep to TAB.
+    cat ${ind_fns} | awk 'BEGIN {OFS=FS="\t"}; {$1=$1"_MNT"; print $0}' > ${tempdir}/${out_name}.ind
+
+    ## Final sanity check, that the file dimensions are correct.
+    if [[ $(wc -l ${tempdir}/${out_name}.geno | cut -f 1 -d ' ') != $(wc -l ${tempdir}/${out_name}.snp | cut -f 1 -d ' ') ]]; then
+      errecho -r "[make_genotype_dataset_out_of_genotypes()]: Genotype file '${out_name}.geno' has a different number of lines than the snp file."
+      exit 1
+    fi
+
+    ## Check that the genotype dataset has a consistent length across first and last snp.
+    geno_top_length=$(bc <<< "$(head -n1 ${tempdir}/${out_name}.geno | wc -c | cut -f 1 -d ' ') - 1")
+    geno_bot_length=$(bc <<< "$(tail -n1 ${tempdir}/${out_name}.geno | wc -c | cut -f 1 -d ' ') - 1")
+    if [[ ${geno_top_length} != ${geno_bot_length} ]]; then
+      errecho -r "[make_genotype_dataset_out_of_genotypes()]: Genotype file '${out_name}.geno' has inconsistent line lengths. Check the input datasets and try again."
+      exit 1
+    fi
+
+    ## The number of characters (excluding the new line character) in the last row of the genotype file should match than the number of individuals.
+    if [[ ${geno_bot_length} != $(wc -l ${tempdir}/${out_name}.ind | cut -f 1 -d ' ') ]]; then
+      errecho -r "[make_genotype_dataset_out_of_genotypes()]: Genotype file '${out_name}.geno' has a different number of lines than the ind file."
+      exit 1
+    fi
+    errecho "[make_genotype_dataset_out_of_genotypes()]: Successfully created genotype dataset '${out_name}.{geno,snp,ind}'."
+
+  ## Merge plink genotypes
+  elif [[ ${format} == 'PLINK' ]]; then
+    errecho -r "[make_genotype_dataset_out_of_genotypes()]: PLINK genotype merging not yet implemented."
+    exit 1
+  fi
+}
+
+## Helper function to pull minotaur versions from Config Profile Description and add them to a file.
+## usage add_versions_file <package_eager_result_dir> <version_fn>
+## Will create a file with the following information:
+##   nf-core/eager version
+##   Minotaur config version
+##   CaptureType config version
+##   Package config version
+##   Minotaur-packager version
+function add_versions_file() {
+  local package_eager_result_dir
+  local capture_type_config
+  local version_fn
+  local minotaur_version
+  local eager_version
+  local minotaur_versioning_string
+  local minotaur_version
+  local config_version
+  local capture_type_version
+  local capture_type_version_string
+  local pipeline_report_fn
+
+  ## Read in function params
+  package_eager_result_dir=${1}
+  version_fn=${2}
+
+  pipeline_report_fn=${package_eager_result_dir}/pipeline_info/pipeline_report.txt ## The pipeline report file from nf-core/eager
+  eager_version=$(grep "Pipeline Release:" ${pipeline_report_fn} | awk -F ":" '{print $NF}')
+
+  ## Each attribute now comes in its own line. (0.2.0dev +)
+  minotaur_version=$(grep "Minortaur.config" ${pipeline_report_fn} | awk -F ' ' '{print $NF}')
+  ## TODO-dev un-hard-code 1240K config (future packages will have the keyword "CaptureType.XXXX.config")
+  capture_type_version_string=$(grep "1240K.config" ${pipeline_report_fn})
+  ## If the grep above returned nothing, then there is no Capture Type profile.
+  if [[ -z ${capture_type_version_string} ]]; then
+    capture_type_version=''
+    capture_type_config=''
+  else
+    capture_type_version=$(echo ${capture_type_version_string} | awk -F ' ' '{print $NF}')
+    capture_type_config="1240K" ## TODO-dev un-hard-code 1240K config (future packages will have the keyword "CaptureType.XXXX.config")
+  fi
+
+  config_version=$(grep "config_template_version" ${pipeline_report_fn} | awk -F ' ' '{print $NF}')
+  package_config_version=$(grep "package_config_version" ${pipeline_report_fn} | awk -F ' ' '{print $NF}')
+
+  ## Create the versions file. Flush any old file contents if the file exists.
+  echo "nf-core/eager version: ${eager_version}"              >  ${version_fn}
+  echo "Minotaur config version: ${minotaur_version}"         >> ${version_fn}
+  if [[ ! -z ${capture_type_version_string} ]]; then
+    echo "CaptureType profile: ${capture_type_config} ${capture_type_version}" >> ${version_fn}
+  fi
+  echo "Config template version: ${config_version}"           >> ${version_fn}
+  echo "Package config version: ${package_config_version}"    >> ${version_fn}
+  echo "Minotaur-packager version: ${VERSION}"                >> ${version_fn}
+}
+
+## Parse CLI args.
+TEMP=`getopt -q -o dhv --long debug,help,version -n "${0}" -- "$@"`
+eval set -- "${TEMP}"
+
+## Parameter defaults
+##   The package_minotaur_directory is the directory where the minotaur output is stored. 
+##    It should look something like this:
+##    /path/to/minotaur/outputs/2021_my_package/
+##     ├── 2021_my_package.finalised.tsv  ## The finalised tsv file for processing with nf-core/eager
+##     ├── data/                          ## The symlinks to the raw data used as eager input
+##     ├── results/                       ## nf-core/eager results directory
+##     └── work/                          ## Netxtflow work directory
+package_minotaur_directory=''
+
+## Print helptext and exit when no option is provided.
+if [[ "${#@}" == "1" ]]; then
+  Helptext
+  exit 0
+fi
+
+## Read in CLI arguments
+while true ; do
+  case "$1" in
+    -h|--help)          Helptext; exit 0 ;;
+    -v|--version)       echo ${VERSION}; exit 0;;
+    -d|--debug)         errecho -y "[minotaur_packager.sh]: Debug mode activated."; debug_mode=1; shift ;;
+    --)                 package_minotaur_directory="${2%/}"; break ;;
+    *)                  echo -e "invalid option provided.\n"; Helptext; exit 1;;
+  esac
+done
+
+## Infer other variables from the package_minotaur_directory provided.
+package_name="${package_minotaur_directory##*/}"
+package_oven_dir="/mnt/archgen/poseidon/minotaur/minotaur-package-oven/" ## Hard-coded path for EVA
+output_package_dir="${package_oven_dir}/${package_name}" ## Hard-coded path for EVA
+finalisedtsv_fn="${package_minotaur_directory}/${package_name}.finalised.tsv"
+root_results_dir="${package_minotaur_directory}/results"
+
+## Get current date for versioning
+errecho -y "[minotaur_packager.sh]: version ${VERSION}"
+date_stamp=$(date +'%Y-%M-%d')
+
+## Check that the finalised tsv file exists.
+if [[ ! -f ${finalisedtsv_fn} ]]; then
+  errecho -r "[${package_name}]: Finalised tsv file not found in '${package_minotaur_directory}'."
+  errecho -r "[${package_name}]: Please check the provided directory and try again."
+  exit 1
+fi
+
+## Quick sanity check that the minotaur run has completed (i.e. genotypes are not newer than multiQC).
+newest_genotype_fn=$(ls -Art -1 ${root_results_dir}/genotyping/*geno | tail -n 1) ## Reverse order and tail to avoid broken pipe errors
+if [[ -z newest_genotype_fn && -f ${newest_genotype_fn} && ${root_results_dir}/multiqc/multiqc_report.html && ${newest_genotype_fn} -nt ${root_results_dir}/multiqc/multiqc_report.html ]]; then
+  errecho -r "Minotaur run has not completed. Please ensure that processing has finished."
+  exit 1
+fi
+
+## Create a temporary directory to mix and rename the genotype datasets in.
+## 'tmp_dir' outside function, 'tempdir' in make_genotype_dataset_out_of_genotypes function
+tmp_dir=$(mktemp -d ${package_oven_dir}/.tmp/MNT_${package_name}.XXXXXXXXXX)
+check_fail $? "[${package_name}]: Failed to create temporary directory. Aborting.\nCheck your permissions in ${package_oven_dir}, and that directory ${package_oven_dir}/.tmp/ exists."
+
+genotype_fns=($(ls -1 ${root_results_dir}/genotyping/*geno)) ## List of genotype files.
+
+## Infer the SNP set from the config activated in the minotaur run from the config description.
+##  TODO-dev Currently hard-coded to 1240K since all data thus far is 1240K. This should grep "CaptureType" from the pipeline report, and infer the SNP set from that.
+# snp_set=$( grep 'Config Profile Description' ${root_results_dir}/pipeline_info/pipeline_report.txt | cut -f 2 -d ',' | cut -d ':' -f 1 | tr -d ' ' | cut -d '.' -f 1 )
+snp_set="1240K"
+errecho -y "[${package_name}]: SNP set inferred as '${snp_set}'."
+
+## Check that the inferred snp set is supported. Should trigger if the inference somehow breaks.
+supported_snpsets=($(ls -1 ${repo_dir}/conf/CaptureType_profiles/ | cut -d "." -f 1))
+if [[ ! -z $(all_x_in_y 1 ${snp_set} ${#supported_snpsets[@]} ${supported_snpsets[@]}) ]]; then
+  errecho -r "[${package_name}]: Inferred SNP set '${snp_set}' is not supported. SNP set inference might have gone wrong."
+  exit 1
+fi
+
+## If the package exists and the genotypes are not newer than the package, then print a message and do nothing.
+if [[ -d ${output_package_dir} ]] && [[ ! ${newest_genotype_fn} -nt ${output_package_dir}/${package_name}.geno ]]; then
+  errecho -y "[${package_name}]: Package is up to date."
+  exit 0
+
+## If genotypes are new or the package does not exist, then create/update the package genotypes.
+elif [[ ! -d ${output_package_dir} ]] || [[ ${newest_genotype_fn} -nt ${output_package_dir}/${package_name}.geno ]]; then
+  errecho -y "[${package_name}]: Genotypes are new or package does not exist. Creating/Updating package genotypes."
+  make_genotype_dataset_out_of_genotypes "EIGENSTRAT" "${package_name}" "${tmp_dir}" ${genotype_fns[@]}
+
+  ## Create a new package with the given genotypes.
+  trident init -p ${tmp_dir}/${package_name}.geno -o ${tmp_dir}/package/ -n ${package_name} --snpSet ${snp_set}
+  check_fail $? "[${package_name}]: Failed to initialise package. Aborting."
+
+  ## Fill in janno
+  errecho -y "Populating janno file"
+  ${repo_dir}/scripts/populate_janno.py -r ${package_minotaur_directory}/results/ -t ${finalisedtsv_fn} -p ${tmp_dir}/package/POSEIDON.yml
+
+  ## TODO-dev Infer genetic sex from janno and mirror to ind file.
+
+  ## Convert data to PLINK format
+  trident genoconvert -d ${tmp_dir}/package \
+    --genoFile ${tmp_dir}/package/${package_name}.geno \
+    --snpFile ${tmp_dir}/package/${package_name}.snp \
+    --indFile ${tmp_dir}/package/${package_name}.ind \
+    --inFormat EIGENSTRAT \
+    --outFormat PLINK \
+    --removeOld
+
+  ## Update the package yaml to account for the changes in the janno (update renamed to rectify)
+  trident rectify -d ${tmp_dir}/package \
+    --packageVersion Patch \
+    --logText "Automatic update of janno file from Minotaur processing." \
+    --checksumAll
+    ## TODO Add poseidon core team, or Minotaur as contributors?
+
+  ## Validate the resulting package
+  trident validate -d ${tmp_dir}/package
+
+    ## Only move package dir to "package oven" if validation passed
+  if [[ $? == 0 ]] && [[ ${debug_mode} -ne 1 ]]; then
+    errecho -y "## Moving '${package_name}' to package oven ##"
+    ## Create directory for poseidon package in package oven
+    ##  Only created now to not trip up the script if execution did not run through fully.
+    mkdir -p $(dirname ${output_package_dir})
+
+    ## Add Minotaur versioning file to package
+    add_versions_file ${root_results_dir} ${tmp_dir}/package/versions.txt
+
+    ## Move package contents to the oven
+    mv ${tmp_dir}/package/ ${output_package_dir}
+
+    ## Then remove remaining temp files
+    errecho -y "## Removing temp directory ##"
+
+    ## Paranoid of removing in root, so extra check for tmp_dir
+    if [[ ! -z ${tmp_dir} ]]; then
+      ## Playing it safe by avoiding rm -r
+      rm ${tmp_dir}/*
+      rmdir ${tmp_dir}
+    fi
+  fi
+
+  ## Partially fill empty fields in janno.
+# elif [[ 1 ]]; then
+  ## TODO Add package updating once that is needed. For now assum each package will be made once and that's it.
+fi
\ No newline at end of file
diff --git a/scripts/populate_janno.py b/scripts/populate_janno.py
new file mode 100755
index 0000000..90c5164
--- /dev/null
+++ b/scripts/populate_janno.py
@@ -0,0 +1,507 @@
+#!/usr/bin/env python3
+
+import pyEager
+import argparse
+import os
+import glob
+import pandas as pd
+import yaml
+import re
+import numpy as np
+from collections import namedtuple
+
+VERSION = "0.2.0dev"
+
+
+def camel_to_snake(name):
+    name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
+    return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
+
+
+def infer_library_name(row, prefix_col=None, target_col=None):
+    strip_me = "{}_".format(row[prefix_col])
+    from_me = row[target_col]
+
+    ## First, remove the added prefix
+    if from_me.startswith(strip_me):
+        inferred_name = from_me.replace(strip_me, "", 1)
+    else:
+        inferred_name = from_me
+
+    ## Finally, strip the hard-coded suffix if there.
+    if inferred_name.endswith("_ss"):
+        inferred_name = inferred_name.replace("_ss", "", 1)
+    else:
+        inferred_name = inferred_name
+
+    return inferred_name
+
+def set_contamination_measure(row):
+    ## If the row's contamination is not NaN, then set to "ANGSD", otherwise NaN
+    if not pd.isna(row["Contamination"]):
+        return "ANGSD[v0.935]" ## TODO-dev infer the version from eager software_versions.txt
+    else:
+        return np.nan
+
+## Function that takes a pd.DataFrame and the name of a column, and applies the format to it to add a new column named poseidon_id
+class PoseidonYaml:
+    def __init__(self, path_poseidon_yml):
+        ## Check that path_poseidon_yml exists. Throw error if not.
+        if not os.path.exists(path_poseidon_yml):
+            raise ValueError(
+                "The path to the poseidon yml file does not exist. Provided path '{}'.".format(
+                    path_poseidon_yml
+                )
+            )
+        ## Read in yaml file and set attributes.
+        self.yaml_data = yaml.safe_load(open(path_poseidon_yml))
+        self.package_dir = os.path.dirname(path_poseidon_yml)
+        self.poseidon_version = self.yaml_data["poseidonVersion"]
+        self.title = self.yaml_data["title"]
+        self.description = self.yaml_data["description"]
+        self.contributor = self.yaml_data["contributor"]
+        self.package_version = self.yaml_data["packageVersion"]
+        self.last_modified = self.yaml_data["lastModified"]
+        self.janno_file = os.path.join(self.package_dir, self.yaml_data["jannoFile"])
+        ## For each key-value pair in dict, check if key is genoFile. If so, join the package_dir to the value. Else, just add the value.
+        ## Also convert keys from camelCase to snake_case.
+        genotype_data = {}
+        for key, value in self.yaml_data["genotypeData"].items():
+            if key.endswith("File"):
+                genotype_data[camel_to_snake(key)] = os.path.join(
+                    self.package_dir, value
+                )
+            else:
+                genotype_data[camel_to_snake(key)] = value
+        ## Genotype data is a dictionary in itself
+        GenotypeDict = namedtuple("GenotypeDict", " ".join(genotype_data.keys()))
+        self.genotype_data = GenotypeDict(**genotype_data)
+        ## Optional attributes
+        for attribute in [
+            "janno_file_chk_sum",
+            "sequencing_source_file",
+            "sequencing_source_file_chk_sum",
+            "bib_file",
+            "bib_file_chk_sum",
+            "changelog_file",
+        ]:
+            try:
+                if attribute.endswith("File"):
+                    setattr(
+                        self,
+                        attribute,
+                        os.path.join(self.package_dir, self.yaml_data[attribute]),
+                    )
+                else:
+                    setattr(self, attribute, self.yaml_data[attribute])
+            except:
+                setattr(self, attribute, None)
+
+
+## Function to calculate weighted mean of a group from the weight and value columns specified.
+def weighted_mean(
+    group, wt_col="wt", val_col="val", filter_col="filter_col", min_val=100
+):
+    non_nan_indices = ~group[val_col].isna()
+    filter_indices = group[filter_col] >= min_val  ## Filter based on 'filter_col' >= 15
+    valid_indices = non_nan_indices & filter_indices
+
+    if valid_indices.any():
+        weighted_values = (
+            group.loc[valid_indices, wt_col] * group.loc[valid_indices, val_col]
+        )
+        total_weight = group.loc[
+            valid_indices, wt_col
+        ].sum()  # Calculate total weight without excluded weights
+        weighted_mean = weighted_values.sum() / total_weight
+    else:
+        weighted_mean = np.nan  # Return NaN if no valid values left
+
+    return weighted_mean
+
+
+def weighted_mean(
+    group, wt_col="wt", val_col="val", filter_col="filter_col", min_val=100
+):
+    non_nan_indices = ~group[val_col].isna()
+    filter_indices = group[filter_col] >= min_val  ## Remove values below the cutoff
+    valid_indices = non_nan_indices & filter_indices
+    if valid_indices.any():
+        weighted_values = (
+            group.loc[valid_indices, wt_col] * group.loc[valid_indices, val_col]
+        )
+        total_weight = group.loc[
+            valid_indices, wt_col
+        ].sum()  # Calculate total weight without excluded weights
+        weighted_mean = weighted_values.sum() / total_weight
+    else:
+        weighted_mean = np.nan  # Return NaN if no valid values left
+    return weighted_mean
+
+
+parser = argparse.ArgumentParser(
+    prog="populate_janno",
+    description="This script reads in different nf-core/eager result files and"
+    "uses this information to populate the relevant fields in a"
+    "poseidon janno file. The janno file and .ind/.fam file of the"
+    "package are updated, unless the --safe option is provided, in"
+    "which case the output files get the suffix '.new'.",
+)
+
+parser.add_argument(
+    "-r",
+    "--eager_result_dir",
+    metavar="<DIR>",
+    required=True,
+    help="The nf-core/eager result directory for the minotaur package.",
+)
+parser.add_argument(
+    "-t",
+    "--eager_tsv_path",
+    metavar="<TSV>",
+    required=True,
+    help="The path to the eager input TSV used to generate the nf-core/eager results.",
+)
+parser.add_argument(
+    "-p",
+    "--poseidon_yml_path",
+    metavar="<YML>",
+    required=True,
+    help="The poseidon yml file for the package.",
+)
+parser.add_argument(
+    "--safe",
+    action="store_true",
+    help="Activate safe mode. The package's janno and ind files will not be updated, but instead new files will be created with the '.new' suffix. Only useful for testing.",
+)
+parser.add_argument("-v", "--version", action="version", version=VERSION)
+
+args = parser.parse_args()
+
+## Collect paths for analyses with multiple jsons.
+damageprofiler_json_paths = glob.glob(
+    os.path.join(args.eager_result_dir, "damageprofiler", "*", "*.json")
+)
+endorspy_json_paths = glob.glob(
+    os.path.join(args.eager_result_dir, "endorspy", "*.json")
+)
+snp_coverage_json_paths = glob.glob(
+    os.path.join(args.eager_result_dir, "genotyping", "*.json")
+)
+
+## Collect paths for analyses with single json.
+sexdeterrmine_json_path = os.path.join(
+    args.eager_result_dir, "sex_determination", "sexdeterrmine.json"
+)
+nuclear_contamination_json_path = os.path.join(
+    args.eager_result_dir, "nuclear_contamination", "nuclear_contamination_mqc.json"
+)
+
+## Read in all JSONs into pandas DataFrames.
+damage_table = pyEager.wrappers.compile_damage_table(damageprofiler_json_paths)
+endogenous_table = pyEager.wrappers.compile_endogenous_table(endorspy_json_paths)
+snp_coverage_table = pyEager.wrappers.compile_snp_coverage_table(
+    snp_coverage_json_paths
+)
+contamination_table = pyEager.parsers.parse_nuclear_contamination_json(
+    nuclear_contamination_json_path
+)
+sex_determination_table = pyEager.parsers.parse_sexdeterrmine_json(
+    sexdeterrmine_json_path
+)
+
+tsv_table = pyEager.parsers.parse_eager_tsv(args.eager_tsv_path)
+tsv_table = pyEager.parsers.infer_merged_bam_names(
+    tsv_table, run_trim_bam=True, skip_deduplication=False
+)
+
+## Read poseidon yaml, infer path to janno file and read janno file.
+poseidon_yaml_data = PoseidonYaml(args.poseidon_yml_path)
+janno_table = pd.read_table(poseidon_yaml_data.janno_file, dtype=str)
+## Add Main_ID to janno table. That is the Poseidon_ID after removing minotaur processing related suffixes.
+janno_table["Eager_ID"] = janno_table["Poseidon_ID"].str.replace(r"_MNT", "")
+janno_table["Main_ID"] = janno_table["Eager_ID"].str.replace(r"_ss", "")
+
+## Prepare damage table for joining. Infer eager Library_ID from id column, by removing '_rmdup.bam' suffix
+## TODO-dev Check if this is the correct way to infer Library_ID from id column when the results are on the sample level.
+damage_table["Library_ID"] = damage_table["id"].str.replace(r"_rmdup.bam", "")
+damage_table = damage_table[["Library_ID", "n_reads", "dmg_5p_1bp"]].rename(
+    columns={"dmg_5p_1bp": "damage"}
+)
+
+## Prepare endogenous table for joining. Should be max value in cases where multiple libraries are merged. But also, should be SG data ONLY, which is unlikely to work well with ENA datasets where TF and SG reads might be merged.
+## TODO-dev Decide on how to get keep only SG data. The SSF Could be used to filter.
+## NOTE: for now, endogenous DNA is ignored, as inference of SG/TF is difficult.
+endogenous_table = endogenous_table[["id", "endogenous_dna"]].rename(
+    columns={"id": "Library_ID", "endogenous_dna": "endogenous"}
+)
+
+## Prepare SNP coverage table for joining. Should always be on the sample level, so only need to fix column names.
+snp_coverage_table = snp_coverage_table.drop("Total_Snps", axis=1).rename(
+    columns={"id": "Sample_ID", "Covered_Snps": "Nr_SNPs"}
+)
+
+## Prepare contamination table for joining. Always at library level. Only need to fix column names here.
+contamination_table = contamination_table[
+    ["id", "Num_SNPs", "Method1_ML_estimate", "Method1_ML_SE"]
+].rename(
+    columns={
+        "id": "Library_ID",
+        "Num_SNPs": "Contamination_Nr_SNPs",
+        "Method1_ML_estimate": "Contamination_Est",
+        "Method1_ML_SE": "Contamination_SE",
+    }
+)
+contamination_table["Contamination_Est"] = pd.to_numeric(
+    contamination_table["Contamination_Est"], errors="coerce"
+)
+contamination_table["Contamination_SE"] = pd.to_numeric(
+    contamination_table["Contamination_SE"], errors="coerce"
+)
+
+## Prepare sex determination table for joining. Naming is sometimes at library and sometimes at sample-level, but results are always at sample level.
+sex_determination_table = sex_determination_table[
+    ["id", "RateX", "RateY", "RateErrX", "RateErrY"]
+]
+
+## Merge all eager tables together (exclude endogenous for now).
+compound_eager_table = (
+    pd.DataFrame.merge(
+        tsv_table,
+        snp_coverage_table,
+        left_on="Sample_Name",
+        right_on="Sample_ID",
+        validate="many_to_one",
+    )
+    .merge(
+        ## Add contamination results per Library_ID
+        contamination_table,
+        on="Library_ID",
+        validate="many_to_one",
+    )
+    .merge(
+        ## Add 5p1 damage results per Library_ID
+        damage_table,
+        on="Library_ID",
+        validate="many_to_one",
+    )
+    .merge(
+        ## Add sex determination results per Sample_ID
+        sex_determination_table,
+        left_on="sexdet_bam_name",
+        right_on="id",
+        validate="many_to_one",
+    )
+    .drop(
+        ## Drop columns that are not relevant anymore
+        [
+            "Lane",
+            "Colour_Chemistry",
+            "SeqType",
+            "Organism",
+            "Strandedness",
+            "UDG_Treatment",
+            "R1",
+            "R2",
+            "BAM",
+            "initial_merge",
+            "additional_merge",
+            "strandedness_clash",
+            "initial_bam_name",
+            "additional_bam_name",
+            "sexdet_bam_name",
+            "Sample_ID",
+            "id",
+        ],
+        axis=1,
+    )
+    .drop_duplicates()
+)
+
+summarised_stats = pd.DataFrame()
+summarised_stats["Sample_Name"] = compound_eager_table["Sample_Name"].unique()
+summarised_stats = (
+    compound_eager_table.astype("string")
+    .groupby("Sample_Name")[["Contamination_Nr_SNPs"]]
+    .agg(
+        lambda x: "Nr Snps (per library): {}. Estimate and error are weighted means of values per library. Libraries with fewer than 100 SNPs used in contamination estimation were excluded.".format(
+            ";".join(x)
+        )
+    )
+    .rename(columns={"Contamination_Nr_SNPs": "Contamination_Note"})
+    .merge(summarised_stats, on="Sample_Name", validate="one_to_one")
+)
+
+## Add library names
+compound_eager_table["Original_library_names"] = compound_eager_table.apply(
+    infer_library_name, axis=1, args=("Sample_Name", "Library_ID")
+)
+summarised_stats = (
+    compound_eager_table.groupby("Sample_Name")[["Original_library_names"]]
+    .agg(lambda x: ";".join(x))
+    .rename(columns={"Original_library_names": "Library_Names"})
+    .merge(summarised_stats, on="Sample_Name", validate="one_to_one")
+)
+
+summarised_stats = (
+    compound_eager_table.groupby("Sample_Name")[["Library_ID"]]
+    .agg("nunique")
+    .rename(columns={"Library_ID": "Nr_Libraries"})
+    .merge(summarised_stats, on="Sample_Name", validate="one_to_one")
+)
+
+summarised_stats = (
+    compound_eager_table.groupby("Sample_Name")[
+        ["Contamination_Nr_SNPs", "Contamination_Est", "Contamination_SE", "n_reads"]
+    ]
+    .apply(
+        weighted_mean,
+        wt_col="n_reads",
+        val_col="Contamination_Est",
+        filter_col="Contamination_Nr_SNPs",
+        min_val=100,
+    )
+    .reset_index("Sample_Name")
+    .rename(columns={0: "Contamination"})
+    .merge(summarised_stats, on="Sample_Name", validate="one_to_one")
+)
+
+summarised_stats = (
+    compound_eager_table.groupby("Sample_Name")[
+        ["Contamination_Nr_SNPs", "Contamination_Est", "Contamination_SE", "n_reads"]
+    ]
+    .apply(
+        weighted_mean,
+        wt_col="n_reads",
+        val_col="Contamination_SE",
+        filter_col="Contamination_Nr_SNPs",
+        min_val=100,
+    )
+    .reset_index("Sample_Name")
+    .rename(columns={0: "Contamination_Err"})
+    .merge(summarised_stats, on="Sample_Name", validate="one_to_one")
+)
+
+## If Contamination column is not empty, add the contamination measure
+summarised_stats["Contamination_Meas"] = summarised_stats.apply(
+    set_contamination_measure, axis=1
+)
+
+summarised_stats = (
+    compound_eager_table.groupby("Sample_Name")[["damage", "n_reads"]]
+    .apply(
+        weighted_mean,
+        wt_col="n_reads",
+        val_col="damage",
+        filter_col="n_reads",
+        min_val=0,
+    )  ## filter on n_reads >= 0, i.e. no filtering.
+    .reset_index("Sample_Name")
+    .rename(columns={0: "Damage"})
+    .merge(summarised_stats, on="Sample_Name", validate="one_to_one")
+)
+
+final_eager_table = compound_eager_table.merge(
+    summarised_stats, on="Sample_Name", validate="many_to_one"
+).drop(
+    columns=[
+        "Library_ID",
+        "Contamination_Nr_SNPs",
+        "Contamination_Est",
+        "Contamination_SE",
+        "n_reads",
+        "damage",
+        "Original_library_names",
+    ],
+)
+
+filled_janno_table = janno_table.merge(
+    final_eager_table, left_on="Eager_ID", right_on="Sample_Name"
+)
+## TODO-dev need to infer Genetic_Sex from 'RateX', 'RateY', 'RateErrX', 'RateErrY'
+for col in [
+    "Nr_SNPs",
+    "Damage",
+    "Contamination_Err",
+    "Contamination",
+    "Nr_Libraries",
+    "Contamination_Note",
+    "Library_Names",
+    "Contamination_Meas",
+]:
+    filled_janno_table[col] = (
+        filled_janno_table[[col + "_x", col + "_y"]].bfill(axis=1).iloc[:, 0]
+    )
+
+## Drop columns duplicated from merges, and columns that are not relevant anymore.
+filled_janno_table = filled_janno_table.drop(
+    list(filled_janno_table.filter(regex=r".*_(x|y)")), axis=1
+).drop("Sample_Name", axis=1)
+
+## Replace NAs with "n/a"
+filled_janno_table.replace(np.nan, "n/a", inplace=True)
+
+final_column_order = [
+    "Poseidon_ID",
+    "Genetic_Sex",
+    "Group_Name",
+    "Alternative_IDs",
+    "Main_ID",  ## Added
+    "Relation_To",
+    "Relation_Degree",
+    "Relation_Type",
+    "Relation_Note",
+    "Collection_ID",
+    "Country",
+    "Country_ISO",
+    "Location",
+    "Site",
+    "Latitude",
+    "Longitude",
+    "Date_Type",
+    "Date_C14_Labnr",
+    "Date_C14_Uncal_BP",
+    "Date_C14_Uncal_BP_Err",
+    "Date_BC_AD_Start",
+    "Date_BC_AD_Median",
+    "Date_BC_AD_Stop",
+    "Date_Note",
+    "MT_Haplogroup",
+    "Y_Haplogroup",
+    "Source_Tissue",
+    "Nr_Libraries",
+    "Library_Names",
+    "Capture_Type",
+    "UDG",
+    "Library_Built",
+    "Genotype_Ploidy",
+    "Data_Preparation_Pipeline_URL",
+    "Endogenous",
+    "Nr_SNPs",
+    "Coverage_on_Target_SNPs",
+    "Damage",
+    "Contamination",
+    "Contamination_Err",
+    "Contamination_Meas",
+    "Contamination_Note",
+    "Genetic_Source_Accession_IDs",
+    "Primary_Contact",
+    "Publication",
+    "Note",
+    "Keywords",
+    "Eager_ID",  ## Added
+    "RateX",  ## Added
+    "RateY",  ## Added
+    "RateErrX",  ## Added
+    "RateErrY",  ## Added
+]
+
+## Reorder columns to match desired order
+filled_janno_table = filled_janno_table[final_column_order]
+
+if args.safe:
+    out_fn = f"{poseidon_yaml_data.janno_file}.new"
+    print(f"Safe mode is activated. Results saved in: {out_fn}")
+    filled_janno_table.to_csv(out_fn, sep="\t", index=False)
+else:
+    filled_janno_table.to_csv(poseidon_yaml_data.janno_file, sep="\t", index=False)
diff --git a/scripts/run_eager.sh b/scripts/run_eager.sh
index 6e2ea49..84653df 100755
--- a/scripts/run_eager.sh
+++ b/scripts/run_eager.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -uo pipefail
 
-VERSION='0.2.0dev'
+VERSION='0.2.1dev'
 
 TEMP=`getopt -q -o hvadD --long help,version,test,array,dry_run,debug: -n 'run_eager.sh' -- "$@"`
 eval set -- "$TEMP"
@@ -51,7 +51,7 @@ nxf_path="/mnt/archgen/tools/nextflow/22.04.5.5708/" ## Use centarlly installed
 eager_version='2.4.6'
 root_eager_dir="${repo_dir}/eager/"
 root_package_dir="${repo_dir}/packages/"
-nextflow_profiles="eva,archgen,medium_data,eva_local_paths"
+nextflow_profiles="eva,archgen,big_data,eva_local_paths"
 tower_config="${repo_dir}/.nextflow_tower.conf" ## Optional tower.nf configuration
 array_temp_fn_dir="${repo_dir}/array_tempfiles"
 array_logs_dir="${repo_dir}/array_Logs"
diff --git a/scripts/validate_downloaded_data.sh b/scripts/validate_downloaded_data.sh
index c9dd6b4..6a8c748 100755
--- a/scripts/validate_downloaded_data.sh
+++ b/scripts/validate_downloaded_data.sh
@@ -1,12 +1,35 @@
 #!/usr/bin/env bash
 set -uo pipefail ## Pipefail, complain on new unassigned variables.
-VERSION='0.2.1dev'
+VERSION='0.2.2dev'
 ## Load helper bash functions
 source $(dirname ${0})/source_me.sh
 
+function Helptext() {
+  echo -ne "\t usage: ${0} [options] <ssf_fn> <download_dir> <package_eager_dir> \n\n"
+  echo -ne "This validates that the md5sums for downloaded FastQ files match the ones in the SSF for the package, and creates symlinks for each line in the eager input TSV.\n\n"
+  echo -ne "Options:\n"
+  echo -ne "-h, --help\t\tPrint this text and exit.\n"
+  echo -ne "-v, --version\t\tPrint version and exit.\n"
+}
+
+## Show helptext and exit if no arguments are provided
+if [[ ${#@} -eq 0 ]]; then
+  Helptext
+  exit 0
+fi
+
+if [[ ${1} == '-v' || ${1} == '--version' ]]; then
+  echo "validate_downloaded_data.sh version: ${VERSION}"
+  exit 0
+elif [[ ${1} == '-h' || ${1} == '--help' ]]; then
+  Helptext
+  exit 0
+fi
+
 ssf_file=$(readlink -f ${1})
 download_dir=$(readlink -f ${2})
-symlink_dir=$(readlink -f ${3})
+package_eager_dir=$(readlink -f ${3})
+symlink_dir=${package_eager_dir}/data
 md5sum_file="${download_dir}/expected_md5sums.txt"
 newest_fastq=$(ls -Art -1 ${download_dir}/*q.gz | tail -n 1) ## Reverse order and tail to avoid broken pipe errors
 script_debug_string="[validate_downloaded_data.sh]:"