From 962b54b7c430e40b3c6bd39ff249a65b2ff36afb Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Tue, 23 May 2023 20:05:31 +0100 Subject: [PATCH] Anopheles refactor part 6 - haplotype data (#401) * wip refactor hap functions * break out params into modules * refactor all params * wip test hap_data * wip test hap_data * fix plotting samples maps * wip test hap_data * rework tests for speed * improve error message * prep for test migration * test coverage up * test missing metadata directly * remove cruft --- malariagen_data/af1.py | 7 +- malariagen_data/ag3.py | 11 +- malariagen_data/anoph/base.py | 221 +- malariagen_data/anoph/base_params.py | 220 + malariagen_data/anoph/dash_params.py | 25 + malariagen_data/anoph/frq_params.py | 67 + malariagen_data/anoph/fst_params.py | 16 + malariagen_data/anoph/g123_params.py | 39 + malariagen_data/anoph/genome_features.py | 70 +- malariagen_data/anoph/genome_sequence.py | 3 +- malariagen_data/anoph/gplt_params.py | 79 + malariagen_data/anoph/h12_params.py | 29 + malariagen_data/anoph/hap_data.py | 287 ++ malariagen_data/anoph/hap_params.py | 11 + malariagen_data/anoph/hapclust_params.py | 34 + malariagen_data/anoph/hapnet_params.py | 52 + malariagen_data/anoph/het_params.py | 77 + malariagen_data/anoph/ihs_params.py | 118 + malariagen_data/anoph/map_params.py | 53 + malariagen_data/anoph/pca_params.py | 64 + malariagen_data/anoph/plotly_params.py | 132 + malariagen_data/anoph/sample_metadata.py | 75 +- malariagen_data/anoph/snp_data.py | 3 +- malariagen_data/anopheles.py | 833 +--- malariagen_data/util.py | 16 +- notebooks/plot_samples.ipynb | 357 +- notebooks/spike_sim_test_data.ipynb | 3597 ++++++++++++++++- tests/anoph/conftest.py | 289 +- .../fixture/missing_metadata/config.json | 7 + .../fixture/missing_metadata/v3/manifest.tsv | 3 + .../AG1000G-AO/samples.admin_units.csv | 82 + .../AG1000G-AO/samples.cohorts.csv | 82 + .../AG1000G-AO/samples.taxa.csv | 82 + .../general/AG1000G-AO/samples.meta.csv | 82 + .../general/AG1000G-AO/wgs_snp_data.csv | 82 + .../general/AG1000G-BF-A/samples.meta.csv | 182 + .../general/AG1000G-BF-A/wgs_snp_data.csv | 182 + .../AG1000G-AO/samples.species_aim.csv | 82 + .../samples.admin_units.csv | 648 --- .../samples.cohorts.csv | 6 +- .../samples.taxa.csv | 648 --- .../samples.meta.csv | 6 +- .../samples.species_aim.csv | 6 +- tests/anoph/test_hap_data.py | 487 +++ tests/anoph/test_sample_metadata.py | 112 + tests/anoph/test_snp_data.py | 201 +- tests/test_af1.py | 165 +- tests/test_ag3.py | 169 +- tests/test_anopheles.py | 41 - 49 files changed, 7176 insertions(+), 2964 deletions(-) create mode 100644 malariagen_data/anoph/base_params.py create mode 100644 malariagen_data/anoph/dash_params.py create mode 100644 malariagen_data/anoph/frq_params.py create mode 100644 malariagen_data/anoph/fst_params.py create mode 100644 malariagen_data/anoph/g123_params.py create mode 100644 malariagen_data/anoph/gplt_params.py create mode 100644 malariagen_data/anoph/h12_params.py create mode 100644 malariagen_data/anoph/hap_data.py create mode 100644 malariagen_data/anoph/hap_params.py create mode 100644 malariagen_data/anoph/hapclust_params.py create mode 100644 malariagen_data/anoph/hapnet_params.py create mode 100644 malariagen_data/anoph/het_params.py create mode 100644 malariagen_data/anoph/ihs_params.py create mode 100644 malariagen_data/anoph/map_params.py create mode 100644 malariagen_data/anoph/pca_params.py create mode 100644 malariagen_data/anoph/plotly_params.py create mode 100644 tests/anoph/fixture/missing_metadata/config.json create mode 100644 tests/anoph/fixture/missing_metadata/v3/manifest.tsv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.admin_units.csv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.cohorts.csv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.taxa.csv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/samples.meta.csv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/wgs_snp_data.csv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/samples.meta.csv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/wgs_snp_data.csv create mode 100644 tests/anoph/fixture/missing_metadata/v3/metadata/species_calls_aim_20220528/AG1000G-AO/samples.species_aim.csv delete mode 100644 tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.admin_units.csv delete mode 100644 tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.taxa.csv create mode 100644 tests/anoph/test_hap_data.py diff --git a/malariagen_data/af1.py b/malariagen_data/af1.py index c991a3889..c92962cca 100644 --- a/malariagen_data/af1.py +++ b/malariagen_data/af1.py @@ -8,9 +8,7 @@ MAJOR_VERSION_PATH = "v1.0" CONFIG_PATH = "v1.0-config.json" GCS_URL = "gs://vo_afun_release/" -SITE_ANNOTATIONS_ZARR_PATH = "reference/genome/idAnoFuneDA-416_04/Anopheles-funestus-DA-416_04_1_SEQANNOTATION.zarr" PCA_RESULTS_CACHE_NAME = "af1_pca_v1" -SNP_ALLELE_COUNTS_CACHE_NAME = "af1_snp_allele_counts_v2" FST_GWSS_CACHE_NAME = "af1_fst_gwss_v1" H12_CALIBRATION_CACHE_NAME = "af1_h12_calibration_v1" H12_GWSS_CACHE_NAME = "af1_h12_gwss_v1" @@ -72,9 +70,7 @@ class Af1(AnophelesDataResource): """ - _site_annotations_zarr_path = SITE_ANNOTATIONS_ZARR_PATH _pca_results_cache_name = PCA_RESULTS_CACHE_NAME - _snp_allele_counts_results_cache_name = SNP_ALLELE_COUNTS_CACHE_NAME _fst_gwss_results_cache_name = FST_GWSS_CACHE_NAME _h12_calibration_cache_name = H12_CALIBRATION_CACHE_NAME _h12_gwss_cache_name = H12_GWSS_CACHE_NAME @@ -82,8 +78,6 @@ class Af1(AnophelesDataResource): _g123_calibration_cache_name = G123_CALIBRATION_CACHE_NAME _h1x_gwss_cache_name = H1X_GWSS_CACHE_NAME _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME - phasing_analysis_ids = ("funestus",) - _default_phasing_analysis = "funestus" def __init__( self, @@ -107,6 +101,7 @@ def __init__( aim_metadata_dtype=None, site_filters_analysis=site_filters_analysis, default_site_mask="funestus", + default_phasing_analysis="funestus", bokeh_output_notebook=bokeh_output_notebook, results_cache=results_cache, log=log, diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py index bce4d5690..083cfeecf 100644 --- a/malariagen_data/ag3.py +++ b/malariagen_data/ag3.py @@ -14,7 +14,7 @@ import malariagen_data # used for .__version__ -from .anoph.base import base_params +from .anoph import base_params from .anopheles import AnophelesDataResource, gplt_params from .util import ( DIM_SAMPLE, @@ -35,12 +35,8 @@ MAJOR_VERSION_PATH = "v3" CONFIG_PATH = "v3-config.json" GCS_URL = "gs://vo_agam_release/" -SITE_ANNOTATIONS_ZARR_PATH = ( - "reference/genome/agamp4/Anopheles-gambiae-PEST_SEQANNOTATION_AgamP4.12.zarr" -) DEFAULT_MAX_COVERAGE_VARIANCE = 0.2 PCA_RESULTS_CACHE_NAME = "ag3_pca_v1" -SNP_ALLELE_COUNTS_CACHE_NAME = "ag3_snp_allele_counts_v2" FST_GWSS_CACHE_NAME = "ag3_fst_gwss_v1" H12_CALIBRATION_CACHE_NAME = "ag3_h12_calibration_v1" H12_GWSS_CACHE_NAME = "ag3_h12_gwss_v1" @@ -107,9 +103,7 @@ class Ag3(AnophelesDataResource): """ virtual_contigs = "2RL", "3RL" - _site_annotations_zarr_path = SITE_ANNOTATIONS_ZARR_PATH _pca_results_cache_name = PCA_RESULTS_CACHE_NAME - _snp_allele_counts_results_cache_name = SNP_ALLELE_COUNTS_CACHE_NAME _fst_gwss_results_cache_name = FST_GWSS_CACHE_NAME _h12_calibration_cache_name = H12_CALIBRATION_CACHE_NAME _h12_gwss_cache_name = H12_GWSS_CACHE_NAME @@ -117,8 +111,6 @@ class Ag3(AnophelesDataResource): _g123_calibration_cache_name = G123_CALIBRATION_CACHE_NAME _h1x_gwss_cache_name = H1X_GWSS_CACHE_NAME _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME - phasing_analysis_ids = ("gamb_colu_arab", "gamb_colu", "arab") - _default_phasing_analysis = "gamb_colu_arab" def __init__( self, @@ -150,6 +142,7 @@ def __init__( }, site_filters_analysis=site_filters_analysis, default_site_mask="gamb_colu_arab", + default_phasing_analysis="gamb_colu_arab", bokeh_output_notebook=bokeh_output_notebook, results_cache=results_cache, log=log, diff --git a/malariagen_data/anoph/base.py b/malariagen_data/anoph/base.py index 9a3605f2b..44cc94aac 100644 --- a/malariagen_data/anoph/base.py +++ b/malariagen_data/anoph/base.py @@ -4,7 +4,6 @@ IO, Any, Dict, - Final, Iterable, List, Literal, @@ -21,7 +20,6 @@ from numpydoc_decorator import doc from tqdm.auto import tqdm from tqdm.dask import TqdmCallback -from typing_extensions import Annotated, TypeAlias from ..util import ( CacheMiss, @@ -30,225 +28,8 @@ check_types, hash_params, init_filesystem, - region_param_type, - single_region_param_type, ) - -DEFAULT: Final[str] = "default" - - -class base_params: - """Parameter definitions common to many functions.""" - - contig: TypeAlias = Annotated[ - str, - """ - Reference genome contig name. See the `contigs` property for valid contig - names. - """, - ] - - single_region: TypeAlias = Annotated[ - single_region_param_type, - """ - Region of the reference genome. Can be a contig name, region string - (formatted like "{contig}:{start}-{end}"), or identifier of a genome - feature such as a gene or transcript. - """, - ] - - region: TypeAlias = Annotated[ - region_param_type, - """ - Region of the reference genome. Can be a contig name, region string - (formatted like "{contig}:{start}-{end}"), or identifier of a genome - feature such as a gene or transcript. Can also be a sequence (e.g., list) - of regions. - """, - ] - - release: TypeAlias = Annotated[ - Union[str, Sequence[str]], - "Release version identifier.", - ] - - sample_set: TypeAlias = Annotated[ - str, - "Sample set identifier.", - ] - - sample_sets: TypeAlias = Annotated[ - Union[Sequence[str], str], - """ - List of sample sets and/or releases. Can also be a single sample set or - release. - """, - ] - - sample_query: TypeAlias = Annotated[ - str, - """ - A pandas query string to be evaluated against the sample metadata, to - select samples to be included in the returned data. - """, - ] - - sample_indices: TypeAlias = Annotated[ - List[int], - """ - Advanced usage parameter. A list of indices of samples to select, - corresponding to the order in which the samples are found within the - sample metadata. Either provide this parameter or sample_query, not - both. - """, - ] - - @staticmethod - def validate_sample_selection_params( - *, - sample_query: Optional[sample_query], - sample_indices: Optional[sample_indices], - ): - if sample_query is not None and sample_indices is not None: - raise ValueError( - "Please provide either sample_query or sample_indices, not both." - ) - - cohort1_query: TypeAlias = Annotated[ - str, - """ - A pandas query string to be evaluated against the sample metadata, - to select samples for the first cohort. - """, - ] - - cohort2_query: TypeAlias = Annotated[ - str, - """ - A pandas query string to be evaluated against the sample metadata, - to select samples for the second cohort. - """, - ] - - site_mask: TypeAlias = Annotated[ - str, - """ - Which site filters mask to apply. See the `site_mask_ids` property for - available values. - """, - ] - - site_class: TypeAlias = Annotated[ - str, - """ - Select sites belonging to one of the following classes: CDS_DEG_4, - (4-fold degenerate coding sites), CDS_DEG_2_SIMPLE (2-fold simple - degenerate coding sites), CDS_DEG_0 (non-degenerate coding sites), - INTRON_SHORT (introns shorter than 100 bp), INTRON_LONG (introns - longer than 200 bp), INTRON_SPLICE_5PRIME (intron within 2 bp of - 5' splice site), INTRON_SPLICE_3PRIME (intron within 2 bp of 3' - splice site), UTR_5PRIME (5' untranslated region), UTR_3PRIME (3' - untranslated region), INTERGENIC (intergenic, more than 10 kbp from - a gene). - """, - ] - - cohort_size: TypeAlias = Annotated[ - int, - """ - Randomly down-sample to this value if the number of samples in the - cohort is greater. Raise an error if the number of samples is less - than this value. - """, - ] - - min_cohort_size: TypeAlias = Annotated[ - int, - """ - Minimum cohort size. Raise an error if the number of samples is - less than this value. - """, - ] - - max_cohort_size: TypeAlias = Annotated[ - int, - """ - Randomly down-sample to this value if the number of samples in the - cohort is greater. - """, - ] - - random_seed: TypeAlias = Annotated[ - int, - "Random seed used for reproducible down-sampling.", - ] - - transcript: TypeAlias = Annotated[ - str, - "Gene transcript identifier.", - ] - - cohort: TypeAlias = Annotated[ - Union[str, Tuple[str, str]], - """ - Either a string giving one of the predefined cohort labels, or a - pair of strings giving a custom cohort label and a sample query. - """, - ] - - cohorts: TypeAlias = Annotated[ - Union[str, Mapping[str, str]], - """ - Either a string giving the name of a predefined cohort set (e.g., - "admin1_month") or a dict mapping custom cohort labels to sample - queries. - """, - ] - - n_jack: TypeAlias = Annotated[ - int, - """ - Number of blocks to divide the data into for the block jackknife - estimation of confidence intervals. N.B., larger is not necessarily - better. - """, - ] - - confidence_level: TypeAlias = Annotated[ - float, - """ - Confidence level to use for confidence interval calculation. E.g., 0.95 - means 95% confidence interval. - """, - ] - - field: TypeAlias = Annotated[str, "Name of array or column to access."] - - inline_array: TypeAlias = Annotated[ - bool, - "Passed through to dask `from_array()`.", - ] - - inline_array_default: inline_array = True - - chunks: TypeAlias = Annotated[ - Union[str, Tuple[int, ...]], - """ - If 'auto' let dask decide chunk size. If 'native' use native zarr - chunks. Also, can be a target size, e.g., '200 MiB', or a tuple of - integers. - """, - ] - - chunks_default: chunks = "native" - - gff_attributes: TypeAlias = Annotated[ - Optional[Union[Sequence[str], str]], - """ - GFF attribute keys to unpack into dataframe columns. Provide "*" to unpack all - attributes. - """, - ] +from . import base_params class AnophelesBase: diff --git a/malariagen_data/anoph/base_params.py b/malariagen_data/anoph/base_params.py new file mode 100644 index 000000000..0f44376fb --- /dev/null +++ b/malariagen_data/anoph/base_params.py @@ -0,0 +1,220 @@ +"""General parameters common to many functions in the public API.""" + +from typing import Final, List, Mapping, Optional, Sequence, Tuple, Union + +from typing_extensions import Annotated, TypeAlias + +from ..util import region_param_type, single_region_param_type + +contig: TypeAlias = Annotated[ + str, + """ + Reference genome contig name. See the `contigs` property for valid contig + names. + """, +] + +single_region: TypeAlias = Annotated[ + single_region_param_type, + """ + Region of the reference genome. Can be a contig name, region string + (formatted like "{contig}:{start}-{end}"), or identifier of a genome + feature such as a gene or transcript. + """, +] + +region: TypeAlias = Annotated[ + region_param_type, + """ + Region of the reference genome. Can be a contig name, region string + (formatted like "{contig}:{start}-{end}"), or identifier of a genome + feature such as a gene or transcript. Can also be a sequence (e.g., list) + of regions. + """, +] + +release: TypeAlias = Annotated[ + Union[str, Sequence[str]], + "Release version identifier.", +] + +sample_set: TypeAlias = Annotated[ + str, + "Sample set identifier.", +] + +sample_sets: TypeAlias = Annotated[ + Union[Sequence[str], str], + """ + List of sample sets and/or releases. Can also be a single sample set or + release. + """, +] + +sample_query: TypeAlias = Annotated[ + str, + """ + A pandas query string to be evaluated against the sample metadata, to + select samples to be included in the returned data. + """, +] + +sample_indices: TypeAlias = Annotated[ + List[int], + """ + Advanced usage parameter. A list of indices of samples to select, + corresponding to the order in which the samples are found within the + sample metadata. Either provide this parameter or sample_query, not + both. + """, +] + + +def validate_sample_selection_params( + *, + sample_query: Optional[sample_query], + sample_indices: Optional[sample_indices], +): + if sample_query is not None and sample_indices is not None: + raise ValueError( + "Please provide either sample_query or sample_indices, not both." + ) + + +cohort1_query: TypeAlias = Annotated[ + str, + """ + A pandas query string to be evaluated against the sample metadata, + to select samples for the first cohort. + """, +] + +cohort2_query: TypeAlias = Annotated[ + str, + """ + A pandas query string to be evaluated against the sample metadata, + to select samples for the second cohort. + """, +] + +site_mask: TypeAlias = Annotated[ + str, + """ + Which site filters mask to apply. See the `site_mask_ids` property for + available values. + """, +] + +site_class: TypeAlias = Annotated[ + str, + """ + Select sites belonging to one of the following classes: CDS_DEG_4, + (4-fold degenerate coding sites), CDS_DEG_2_SIMPLE (2-fold simple + degenerate coding sites), CDS_DEG_0 (non-degenerate coding sites), + INTRON_SHORT (introns shorter than 100 bp), INTRON_LONG (introns + longer than 200 bp), INTRON_SPLICE_5PRIME (intron within 2 bp of + 5' splice site), INTRON_SPLICE_3PRIME (intron within 2 bp of 3' + splice site), UTR_5PRIME (5' untranslated region), UTR_3PRIME (3' + untranslated region), INTERGENIC (intergenic, more than 10 kbp from + a gene). + """, +] + +cohort_size: TypeAlias = Annotated[ + int, + """ + Randomly down-sample to this value if the number of samples in the + cohort is greater. Raise an error if the number of samples is less + than this value. + """, +] + +min_cohort_size: TypeAlias = Annotated[ + int, + """ + Minimum cohort size. Raise an error if the number of samples is + less than this value. + """, +] + +max_cohort_size: TypeAlias = Annotated[ + int, + """ + Randomly down-sample to this value if the number of samples in the + cohort is greater. + """, +] + +random_seed: TypeAlias = Annotated[ + int, + "Random seed used for reproducible down-sampling.", +] + +transcript: TypeAlias = Annotated[ + str, + "Gene transcript identifier.", +] + +cohort: TypeAlias = Annotated[ + Union[str, Tuple[str, str]], + """ + Either a string giving one of the predefined cohort labels, or a + pair of strings giving a custom cohort label and a sample query. + """, +] + +cohorts: TypeAlias = Annotated[ + Union[str, Mapping[str, str]], + """ + Either a string giving the name of a predefined cohort set (e.g., + "admin1_month") or a dict mapping custom cohort labels to sample + queries. + """, +] + +n_jack: TypeAlias = Annotated[ + int, + """ + Number of blocks to divide the data into for the block jackknife + estimation of confidence intervals. N.B., larger is not necessarily + better. + """, +] + +confidence_level: TypeAlias = Annotated[ + float, + """ + Confidence level to use for confidence interval calculation. E.g., 0.95 + means 95% confidence interval. + """, +] + +field: TypeAlias = Annotated[str, "Name of array or column to access."] + +inline_array: TypeAlias = Annotated[ + bool, + "Passed through to dask `from_array()`.", +] + +inline_array_default: inline_array = True + +chunks: TypeAlias = Annotated[ + Union[str, Tuple[int, ...]], + """ + If 'auto' let dask decide chunk size. If 'native' use native zarr + chunks. Also, can be a target size, e.g., '200 MiB', or a tuple of + integers. + """, +] + +chunks_default: chunks = "native" + +gff_attributes: TypeAlias = Annotated[ + Optional[Union[Sequence[str], str]], + """ + GFF attribute keys to unpack into dataframe columns. Provide "*" to unpack all + attributes. + """, +] + +DEFAULT: Final[str] = "default" diff --git a/malariagen_data/anoph/dash_params.py b/malariagen_data/anoph/dash_params.py new file mode 100644 index 000000000..91cc48504 --- /dev/null +++ b/malariagen_data/anoph/dash_params.py @@ -0,0 +1,25 @@ +"""Parameters for functions using plotly dash (e.g., haplotype networks).""" + +from typing import Literal, Union + +from typing_extensions import Annotated, TypeAlias + +height: TypeAlias = Annotated[int, "Height of the Dash app in pixels (px)."] + +width: TypeAlias = Annotated[Union[int, str], "Width of the Dash app."] + +server_mode: TypeAlias = Annotated[ + Literal["inline", "external", "jupyterlab"], + """ + Controls how the Jupyter Dash app will be launched. See + https://medium.com/plotly/introducing-jupyterdash-811f1f57c02e for + more information. + """, +] + +server_mode_default: server_mode = "inline" + +server_port: TypeAlias = Annotated[ + int, + "Manually override the port on which the Dash app will run.", +] diff --git a/malariagen_data/anoph/frq_params.py b/malariagen_data/anoph/frq_params.py new file mode 100644 index 000000000..417e507b0 --- /dev/null +++ b/malariagen_data/anoph/frq_params.py @@ -0,0 +1,67 @@ +"""Parameter definitions for functions computing and plotting allele frequencies.""" + +from typing import Literal + +import xarray as xr +from typing_extensions import Annotated, TypeAlias + +drop_invariant: TypeAlias = Annotated[ + bool, + """ + If True, drop variants not observed in the selected samples. + """, +] + +effects: TypeAlias = Annotated[bool, "If True, add SNP effect annotations."] + +area_by: TypeAlias = Annotated[ + str, + """ + Column name in the sample metadata to use to group samples spatially. E.g., + use "admin1_iso" or "admin1_name" to group by level 1 administrative + divisions, or use "admin2_name" to group by level 2 administrative + divisions. + """, +] + +period_by: TypeAlias = Annotated[ + Literal["year", "quarter", "month"], + "Length of time to group samples temporally.", +] + +variant_query: TypeAlias = Annotated[ + str, + "A pandas query to be evaluated against variants.", +] + +nobs_mode: TypeAlias = Annotated[ + Literal["called", "fixed"], + """ + Method for calculating the denominator when computing frequencies. If + "called" then use the number of called alleles, i.e., number of samples + with non-missing genotype calls multiplied by 2. If "fixed" then use the + number of samples multiplied by 2. + """, +] + +nobs_mode_default: nobs_mode = "called" + +ci_method: TypeAlias = Annotated[ + Literal["normal", "agresti_coull", "beta", "wilson", "binom_test"], + """ + Method to use for computing confidence intervals, passed through to + `statsmodels.stats.proportion.proportion_confint`. + """, +] + +ci_method_default: ci_method = "wilson" + +ds_frequencies_advanced: TypeAlias = Annotated[ + xr.Dataset, + """ + A dataset of variant frequencies, such as returned by + `snp_allele_frequencies_advanced()`, + `aa_allele_frequencies_advanced()` or + `gene_cnv_frequencies_advanced()`. + """, +] diff --git a/malariagen_data/anoph/fst_params.py b/malariagen_data/anoph/fst_params.py new file mode 100644 index 000000000..707d07d64 --- /dev/null +++ b/malariagen_data/anoph/fst_params.py @@ -0,0 +1,16 @@ +"""Parameter definitions for Fst functions.""" + +from typing import Optional + +from typing_extensions import Annotated, TypeAlias + +from . import base_params + +# N.B., window size can mean different things for different functions +window_size: TypeAlias = Annotated[ + int, + "The size of windows (number of sites) used to calculate statistics within.", +] +cohort_size_default: Optional[base_params.cohort_size] = None +min_cohort_size_default: base_params.min_cohort_size = 15 +max_cohort_size_default: base_params.max_cohort_size = 50 diff --git a/malariagen_data/anoph/g123_params.py b/malariagen_data/anoph/g123_params.py new file mode 100644 index 000000000..d58b711ef --- /dev/null +++ b/malariagen_data/anoph/g123_params.py @@ -0,0 +1,39 @@ +"""Parameter definitions for G123 analysis functions.""" + +from typing import Tuple + +from typing_extensions import Annotated, TypeAlias + +from . import base_params + +sites: TypeAlias = Annotated[ + str, + """ + Which sites to use: 'all' includes all sites that pass + site filters; 'segregating' includes only segregating sites for + the given cohort; or a phasing analysis identifier can be + provided to use sites from the haplotype data, which is an + approximation to finding segregating sites in the entire Ag3.0 + (gambiae complex) or Af1.0 (funestus) cohort. + """, +] + +window_sizes: TypeAlias = Annotated[ + Tuple[int, ...], + """ + The sizes of windows (number of sites) used to calculate statistics within. + """, +] + +window_sizes_default: window_sizes = (100, 200, 500, 1000, 2000, 5000, 10000, 20000) + +window_size: TypeAlias = Annotated[ + int, + """ + The size of windows (number of sites) used to calculate statistics within. + """, +] + +min_cohort_size_default: base_params.min_cohort_size = 20 + +max_cohort_size_default: base_params.max_cohort_size = 50 diff --git a/malariagen_data/anoph/genome_features.py b/malariagen_data/anoph/genome_features.py index 60a5cd8f6..3368023a7 100644 --- a/malariagen_data/anoph/genome_features.py +++ b/malariagen_data/anoph/genome_features.py @@ -1,4 +1,4 @@ -from typing import Dict, Literal, Optional, Tuple, Union +from typing import Dict, Optional, Tuple import bokeh.models import bokeh.plotting @@ -6,7 +6,6 @@ import pandas as pd from numpydoc_decorator import doc from pandas.io.common import infer_compression -from typing_extensions import Annotated, TypeAlias from ..util import ( Region, @@ -16,74 +15,11 @@ read_gff3, unpack_gff3_attributes, ) -from .base import DEFAULT, base_params +from . import base_params, gplt_params +from .base_params import DEFAULT from .genome_sequence import AnophelesGenomeSequenceData -class gplt_params: - """Parameters for genome plotting functions. N.B., genome plots are always - plotted with bokeh.""" - - sizing_mode: TypeAlias = Annotated[ - Literal[ - "fixed", - "stretch_width", - "stretch_height", - "stretch_both", - "scale_width", - "scale_height", - "scale_both", - ], - """ - Bokeh plot sizing mode, see also - https://docs.bokeh.org/en/latest/docs/user_guide/basic/layouts.html#sizing-modes - """, - ] - sizing_mode_default: sizing_mode = "stretch_width" - width: TypeAlias = Annotated[ - Optional[int], # always can be None - "Plot width in pixels (px).", - ] - width_default: width = None - height: TypeAlias = Annotated[ - int, - "Plot height in pixels (px).", - ] - track_height: TypeAlias = Annotated[ - int, - "Main track height in pixels (px).", - ] - genes_height: TypeAlias = Annotated[ - int, - "Genes track height in pixels (px).", - ] - genes_height_default: genes_height = 120 - show: TypeAlias = Annotated[ - bool, - "If true, show the plot. If False, do not show the plot, but return the figure.", - ] - toolbar_location: TypeAlias = Annotated[ - Literal["above", "below", "left", "right"], - "Location of bokeh toolbar.", - ] - toolbar_location_default: toolbar_location = "above" - x_range: TypeAlias = Annotated[ - bokeh.models.Range, - "X axis range (for linking to other tracks).", - ] - title: TypeAlias = Annotated[ - Union[str, bool], - "Plot title. If True, a title may be automatically generated.", - ] - figure: TypeAlias = Annotated[ - # Use quite a broad type here to accommodate both single-panel figures - # created via bokeh.plotting and multi-panel figures created via - # bokeh.layouts. - Optional[bokeh.model.Model], - "A bokeh figure (only returned if show=False).", - ] - - class AnophelesGenomeFeaturesData(AnophelesGenomeSequenceData): def __init__( self, diff --git a/malariagen_data/anoph/genome_sequence.py b/malariagen_data/anoph/genome_sequence.py index 860556a86..37bd26a50 100644 --- a/malariagen_data/anoph/genome_sequence.py +++ b/malariagen_data/anoph/genome_sequence.py @@ -11,7 +11,8 @@ init_zarr_store, parse_single_region, ) -from .base import AnophelesBase, base_params +from . import base_params +from .base import AnophelesBase class AnophelesGenomeSequenceData(AnophelesBase): diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py new file mode 100644 index 000000000..dba6390a3 --- /dev/null +++ b/malariagen_data/anoph/gplt_params.py @@ -0,0 +1,79 @@ +"""Parameters for genome plotting functions. N.B., genome plots are always +plotted with bokeh.""" + +from typing import Literal, Optional, Union + +import bokeh.models +from typing_extensions import Annotated, TypeAlias + +sizing_mode: TypeAlias = Annotated[ + Literal[ + "fixed", + "stretch_width", + "stretch_height", + "stretch_both", + "scale_width", + "scale_height", + "scale_both", + ], + """ + Bokeh plot sizing mode, see also + https://docs.bokeh.org/en/latest/docs/user_guide/basic/layouts.html#sizing-modes + """, +] + +sizing_mode_default: sizing_mode = "stretch_width" + +width: TypeAlias = Annotated[ + Optional[int], # always can be None + "Plot width in pixels (px).", +] + +width_default: width = None + +height: TypeAlias = Annotated[ + int, + "Plot height in pixels (px).", +] + +track_height: TypeAlias = Annotated[ + int, + "Main track height in pixels (px).", +] + +genes_height: TypeAlias = Annotated[ + int, + "Genes track height in pixels (px).", +] + +genes_height_default: genes_height = 120 + +show: TypeAlias = Annotated[ + bool, + "If true, show the plot. If False, do not show the plot, but return the figure.", +] + +toolbar_location: TypeAlias = Annotated[ + Literal["above", "below", "left", "right"], + "Location of bokeh toolbar.", +] + +toolbar_location_default: toolbar_location = "above" + +x_range: TypeAlias = Annotated[ + bokeh.models.Range, + "X axis range (for linking to other tracks).", +] + +title: TypeAlias = Annotated[ + Union[str, bool], + "Plot title. If True, a title may be automatically generated.", +] + +figure: TypeAlias = Annotated[ + # Use quite a broad type here to accommodate both single-panel figures + # created via bokeh.plotting and multi-panel figures created via + # bokeh.layouts. + Optional[bokeh.model.Model], + "A bokeh figure (only returned if show=False).", +] diff --git a/malariagen_data/anoph/h12_params.py b/malariagen_data/anoph/h12_params.py new file mode 100644 index 000000000..564f87587 --- /dev/null +++ b/malariagen_data/anoph/h12_params.py @@ -0,0 +1,29 @@ +"""Parameter definitions for H12 analysis functions.""" + +from typing import Optional, Tuple + +from typing_extensions import Annotated, TypeAlias + +from . import base_params + +window_sizes: TypeAlias = Annotated[ + Tuple[int, ...], + """ + The sizes of windows (number of SNPs) used to calculate statistics within. + """, +] + +window_sizes_default: window_sizes = (100, 200, 500, 1000, 2000, 5000, 10000, 20000) + +window_size: TypeAlias = Annotated[ + int, + """ + The size of windows (number of SNPs) used to calculate statistics within. + """, +] + +cohort_size_default: Optional[base_params.cohort_size] = None + +min_cohort_size_default: base_params.min_cohort_size = 15 + +max_cohort_size_default: base_params.max_cohort_size = 50 diff --git a/malariagen_data/anoph/hap_data.py b/malariagen_data/anoph/hap_data.py new file mode 100644 index 000000000..51b41b105 --- /dev/null +++ b/malariagen_data/anoph/hap_data.py @@ -0,0 +1,287 @@ +from typing import Dict, List, Optional, Tuple + +import dask.array as da +import numpy as np +import xarray as xr +import zarr +from numpydoc_decorator import doc + +from ..util import ( + DIM_ALLELE, + DIM_PLOIDY, + DIM_SAMPLE, + DIM_VARIANT, + Region, + check_types, + da_from_zarr, + init_zarr_store, + locate_region, + parse_multi_region, + simple_xarray_concat, +) +from . import base_params, hap_params +from .base_params import DEFAULT +from .genome_features import AnophelesGenomeFeaturesData +from .genome_sequence import AnophelesGenomeSequenceData +from .sample_metadata import AnophelesSampleMetadata + + +class AnophelesHapData( + AnophelesSampleMetadata, AnophelesGenomeFeaturesData, AnophelesGenomeSequenceData +): + def __init__( + self, + default_phasing_analysis: Optional[str] = None, + **kwargs, + ): + # N.B., this class is designed to work cooperatively, and + # so it's important that any remaining parameters are passed + # to the superclass constructor. + super().__init__(**kwargs) + + # These will vary between data resources. + self._default_phasing_analysis = default_phasing_analysis + + # Set up caches. + self._cache_haplotypes: Dict = dict() + self._cache_haplotype_sites: Dict = dict() + + @property + def phasing_analysis_ids(self) -> Tuple[str, ...]: + """Identifiers for the different phasing analyses that are available. + These are values than can be used for the `analysis` parameter in any + method making using of haplotype data. + + """ + return tuple(self.config.get("PHASING_ANALYSIS_IDS", ())) # ensure tuple + + def _prep_phasing_analysis_param(self, *, analysis: hap_params.analysis): + if analysis == DEFAULT: + # Use whatever is the default phasing analysis for this data resource. + assert self._default_phasing_analysis is not None + return self._default_phasing_analysis + elif analysis in self.phasing_analysis_ids: + return analysis + else: + raise ValueError( + f"Invalid phasing analysis, must be one of f{self.phasing_analysis_ids}." + ) + + @check_types + @doc( + summary="Open haplotype sites zarr.", + returns="Zarr hierarchy.", + ) + def open_haplotype_sites( + self, analysis: hap_params.analysis = DEFAULT + ) -> zarr.hierarchy.Group: + analysis = self._prep_phasing_analysis_param(analysis=analysis) + try: + return self._cache_haplotype_sites[analysis] + except KeyError: + path = f"{self._base_path}/{self._major_version_path}/snp_haplotypes/sites/{analysis}/zarr" + store = init_zarr_store(fs=self._fs, path=path) + root = zarr.open_consolidated(store=store) + self._cache_haplotype_sites[analysis] = root + return root + + def _haplotype_sites_for_contig( + self, *, contig, analysis, field, inline_array, chunks + ): + sites = self.open_haplotype_sites(analysis=analysis) + arr = sites[f"{contig}/variants/{field}"] + arr = da_from_zarr(arr, inline_array=inline_array, chunks=chunks) + return arr + + @check_types + @doc( + summary="Open haplotypes zarr.", + returns="Zarr hierarchy.", + ) + def open_haplotypes( + self, + sample_set: base_params.sample_set, + analysis: hap_params.analysis = DEFAULT, + ) -> Optional[zarr.hierarchy.Group]: + analysis = self._prep_phasing_analysis_param(analysis=analysis) + try: + return self._cache_haplotypes[(sample_set, analysis)] + except KeyError: + release = self.lookup_release(sample_set=sample_set) + release_path = self._release_to_path(release) + path = f"{self._base_path}/{release_path}/snp_haplotypes/{sample_set}/{analysis}/zarr" + store = init_zarr_store(fs=self._fs, path=path) + # Some sample sets have no data for a given analysis, handle this. + try: + root = zarr.open_consolidated(store=store) + except FileNotFoundError: + root = None + self._cache_haplotypes[(sample_set, analysis)] = root + return root + + def _haplotypes_for_contig( + self, *, contig, sample_set, analysis, inline_array, chunks + ): + # Open haplotypes zarr. + root = self.open_haplotypes(sample_set=sample_set, analysis=analysis) + + # Some sample sets have no data for a given analysis, handle this. + if root is None: + return None + + # Open haplotype sites zarr. + sites = self.open_haplotype_sites(analysis=analysis) + + coords = dict() + data_vars = dict() + + # Set up variant_position. + pos = sites[f"{contig}/variants/POS"] + coords["variant_position"] = ( + [DIM_VARIANT], + da_from_zarr(pos, inline_array=inline_array, chunks=chunks), + ) + + # Set up variant_contig. + contig_index = self.contigs.index(contig) + coords["variant_contig"] = ( + [DIM_VARIANT], + da.full_like(pos, fill_value=contig_index, dtype="u1"), + ) + + # Set up variant_allele. + ref = da_from_zarr( + sites[f"{contig}/variants/REF"], inline_array=inline_array, chunks=chunks + ) + alt = da_from_zarr( + sites[f"{contig}/variants/ALT"], inline_array=inline_array, chunks=chunks + ) + variant_allele = da.hstack([ref[:, None], alt[:, None]]) + data_vars["variant_allele"] = [DIM_VARIANT, DIM_ALLELE], variant_allele + + # Set up call_genotype. + data_vars["call_genotype"] = ( + [DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY], + da_from_zarr( + root[f"{contig}/calldata/GT"], inline_array=inline_array, chunks=chunks + ), + ) + + # Set up sample array. + coords["sample_id"] = ( + [DIM_SAMPLE], + da_from_zarr(root["samples"], inline_array=inline_array, chunks=chunks), + ) + + # Set up attributes. + attrs = {"contigs": self.contigs, "analysis": analysis} + + # Create a dataset. + ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + + return ds + + @check_types + @doc( + summary="Access haplotype data.", + returns="A dataset of haplotypes and associated data.", + ) + def haplotypes( + self, + region: base_params.region, + analysis: hap_params.analysis = DEFAULT, + sample_sets: Optional[base_params.sample_sets] = None, + sample_query: Optional[base_params.sample_query] = None, + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + cohort_size: Optional[base_params.cohort_size] = None, + min_cohort_size: Optional[base_params.min_cohort_size] = None, + max_cohort_size: Optional[base_params.max_cohort_size] = None, + random_seed: base_params.random_seed = 42, + ) -> xr.Dataset: + # Normalise parameters. + sample_sets_prepped = self._prep_sample_sets_param(sample_sets=sample_sets) + del sample_sets + regions: List[Region] = parse_multi_region(self, region) + del region + analysis = self._prep_phasing_analysis_param(analysis=analysis) + + # Build dataset. + lx = [] + for r in regions: + ly = [] + + for s in sample_sets_prepped: + y = self._haplotypes_for_contig( + contig=r.contig, + sample_set=s, + analysis=analysis, + inline_array=inline_array, + chunks=chunks, + ) + if y is not None: + ly.append(y) + + if len(ly) == 0: + # Bail out, no data for given sample sets and analysis. + raise ValueError(f"No samples found for phasing analysis {analysis!r}") + + # Concatenate data from multiple sample sets. + x = simple_xarray_concat(ly, dim=DIM_SAMPLE) + + # Handle region. + if r.start or r.end: + pos = x["variant_position"].values + loc_region = locate_region(r, pos) + x = x.isel(variants=loc_region) + + lx.append(x) + + # Concatenate data from multiple regions. + ds = simple_xarray_concat(lx, dim=DIM_VARIANT) + + # Handle sample query. + if sample_query is not None: + # Load sample metadata. + df_samples = self.sample_metadata(sample_sets=sample_sets_prepped) + + # Align sample metadata with haplotypes. + phased_samples = ds["sample_id"].values.tolist() + df_samples_phased = ( + df_samples.set_index("sample_id").loc[phased_samples].reset_index() + ) + + # Apply the query. + loc_samples = df_samples_phased.eval(sample_query).values + if np.count_nonzero(loc_samples) == 0: + # Bail out, no samples matching the query. + raise ValueError( + f"No samples found for phasing analysis {analysis!r} and query {sample_query!r}" + ) + ds = ds.isel(samples=loc_samples) + + if cohort_size is not None: + # Handle cohort size - overrides min and max. + min_cohort_size = cohort_size + max_cohort_size = cohort_size + + if min_cohort_size is not None: + # Handle min cohort size. + n_samples = ds.dims["samples"] + if n_samples < min_cohort_size: + raise ValueError( + f"Not enough samples ({n_samples}) for minimum cohort size ({min_cohort_size})" + ) + + if max_cohort_size is not None: + # Handle max cohort size. + n_samples = ds.dims["samples"] + if n_samples > max_cohort_size: + rng = np.random.default_rng(seed=random_seed) + loc_downsample = rng.choice( + n_samples, size=max_cohort_size, replace=False + ) + loc_downsample.sort() + ds = ds.isel(samples=loc_downsample) + + return ds diff --git a/malariagen_data/anoph/hap_params.py b/malariagen_data/anoph/hap_params.py new file mode 100644 index 000000000..6a8194ec8 --- /dev/null +++ b/malariagen_data/anoph/hap_params.py @@ -0,0 +1,11 @@ +"""Parameters common to functions accessing haplotype data.""" + +from typing_extensions import Annotated, TypeAlias + +analysis: TypeAlias = Annotated[ + str, + """ + Which haplotype phasing analysis to use. See the + `phasing_analysis_ids` property for available values. + """, +] diff --git a/malariagen_data/anoph/hapclust_params.py b/malariagen_data/anoph/hapclust_params.py new file mode 100644 index 000000000..6de87ac94 --- /dev/null +++ b/malariagen_data/anoph/hapclust_params.py @@ -0,0 +1,34 @@ +"""Parameters for haplotype clustering functions.""" + +from typing import Literal + +from typing_extensions import Annotated, TypeAlias + +linkage_method: TypeAlias = Annotated[ + Literal["single", "complete", "average", "weighted", "centroid", "median", "ward"], + """ + The linkage algorithm to use. See the Linkage Methods section of the + scipy.cluster.hierarchy.linkage docs for full descriptions. + """, +] + +linkage_method_default: linkage_method = "single" + +count_sort: TypeAlias = Annotated[ + bool, + """ + For each node n, the order (visually, from left-to-right) n's two descendant + links are plotted is determined by this parameter. If True, the child with + the minimum number of original objects in its cluster is plotted first. Note + distance_sort and count_sort cannot both be True. + """, +] + +distance_sort: TypeAlias = Annotated[ + bool, + """ + For each node n, the order (visually, from left-to-right) n's two descendant + links are plotted is determined by this parameter. If True, The child with the + minimum distance between its direct descendants is plotted first. + """, +] diff --git a/malariagen_data/anoph/hapnet_params.py b/malariagen_data/anoph/hapnet_params.py new file mode 100644 index 000000000..d716b0244 --- /dev/null +++ b/malariagen_data/anoph/hapnet_params.py @@ -0,0 +1,52 @@ +"""Parameters for haplotype network functions.""" + +from typing import List, Mapping + +from typing_extensions import Annotated, TypeAlias + +max_dist: TypeAlias = Annotated[ + int, + "Join network components up to a maximum distance of 2 SNP differences.", +] + +max_dist_default: max_dist = 2 + +color: TypeAlias = Annotated[ + str, + """ + Identifies a column in the sample metadata which determines the colour + of pie chart segments within nodes. + """, +] + +color_discrete_sequence: TypeAlias = Annotated[ + List, "Provide a list of colours to use." +] + +color_discrete_map: TypeAlias = Annotated[ + Mapping, "Provide an explicit mapping from values to colours." +] + +category_order: TypeAlias = Annotated[ + List, + "Control the order in which values appear in the legend.", +] + +node_size_factor: TypeAlias = Annotated[ + int, + "Control the sizing of nodes.", +] + +node_size_factor_default: node_size_factor = 50 + +layout: TypeAlias = Annotated[ + str, + "Name of the network layout to use to position nodes.", +] + +layout_default: layout = "cose" + +layout_params: TypeAlias = Annotated[ + Mapping, + "Additional parameters to the layout algorithm.", +] diff --git a/malariagen_data/anoph/het_params.py b/malariagen_data/anoph/het_params.py new file mode 100644 index 000000000..b2005c629 --- /dev/null +++ b/malariagen_data/anoph/het_params.py @@ -0,0 +1,77 @@ +"""Parameters for functions related to heterozygosity and runs of homozygosity.""" + +from typing import List, Mapping, Tuple, Union + +import pandas as pd +from typing_extensions import Annotated, TypeAlias + +single_sample: TypeAlias = Annotated[ + Union[str, int], + "Sample identifier or index within sample set.", +] + +sample: TypeAlias = Annotated[ + Union[single_sample, List[single_sample], Tuple[single_sample, ...]], + "Sample identifier or index within sample set. Multiple values can also be provided as a list or tuple.", +] + +window_size: TypeAlias = Annotated[ + int, + "Number of sites per window.", +] + +window_size_default: window_size = 20_000 + +phet_roh: TypeAlias = Annotated[ + float, + "Probability of observing a heterozygote in a ROH.", +] + +phet_roh_default: phet_roh = 0.001 + +phet_nonroh: TypeAlias = Annotated[ + Tuple[float, ...], + "One or more probabilities of observing a heterozygote outside a ROH.", +] + +phet_nonroh_default: phet_nonroh = (0.003, 0.01) + +transition: TypeAlias = Annotated[ + float, + """ + Probability of moving between states. A larger window size may call + for a larger transitional probability. + """, +] + +transition_default: transition = 0.001 + +y_max: TypeAlias = Annotated[ + float, + "Y axis limit.", +] + +y_max_default: y_max = 0.03 + +circle_kwargs: TypeAlias = Annotated[ + Mapping, + "Passed through to bokeh circle() function.", +] + +df_roh: TypeAlias = Annotated[ + pd.DataFrame, + """ + A DataFrame where each row provides data about a single run of + homozygosity. + """, +] + +heterozygosity_height: TypeAlias = Annotated[ + int, + "Height in pixels (px) of heterozygosity track.", +] + +roh_height: TypeAlias = Annotated[ + int, + "Height in pixels (px) of runs of homozygosity track.", +] diff --git a/malariagen_data/anoph/ihs_params.py b/malariagen_data/anoph/ihs_params.py new file mode 100644 index 000000000..f9eb1f928 --- /dev/null +++ b/malariagen_data/anoph/ihs_params.py @@ -0,0 +1,118 @@ +"""Parameter definitions for IHS analysis functions.""" + +from typing import Tuple, Union + +from typing_extensions import Annotated, TypeAlias + +from . import base_params + +window_size: TypeAlias = Annotated[ + int, + """ + The size of window in number of SNPs used to summarise iHS over. + If None, per-variant iHS values are returned. + """, +] + +window_size_default: window_size = 200 + +min_cohort_size_default: base_params.min_cohort_size = 15 + +max_cohort_size_default: base_params.max_cohort_size = 50 + +percentiles: TypeAlias = Annotated[ + Union[int, Tuple[int, ...]], + """ + If window size is specified, this returns the iHS percentiles + for each window. + """, +] + +percentiles_default: percentiles = (50, 75, 100) + +standardize: TypeAlias = Annotated[ + bool, "If True, standardize iHS values by alternate allele counts." +] + +standardization_bins: TypeAlias = Annotated[ + Tuple[float, ...], + "If provided, use these allele count bins to standardize iHS values.", +] + +standardization_n_bins: TypeAlias = Annotated[ + int, + """ + Number of allele count bins to use for standardization. + Overrides standardization_bins. + """, +] + +standardization_n_bins_default: standardization_n_bins = 20 + +standardization_diagnostics: TypeAlias = Annotated[ + bool, "If True, plot some diagnostics about the standardization." +] + +filter_min_maf: TypeAlias = Annotated[ + float, + """ + Minimum minor allele frequency to use for filtering prior to passing + haplotypes to allel.ihs function + """, +] + +filter_min_maf_default: filter_min_maf = 0.05 + +compute_min_maf: TypeAlias = Annotated[ + float, + """ + Do not compute integrated haplotype homozygosity for variants with + minor allele frequency below this threshold. + """, +] + +compute_min_maf_default: compute_min_maf = 0.05 + +min_ehh: TypeAlias = Annotated[ + float, + """ + Minimum EHH beyond which to truncate integrated haplotype homozygosity + calculation. + """, +] + +min_ehh_default: min_ehh = 0.05 + +max_gap: TypeAlias = Annotated[ + int, + """ + Do not report scores if EHH spans a gap larger than this number of + base pairs. + """, +] + +max_gap_default: max_gap = 200_000 + +gap_scale: TypeAlias = Annotated[ + int, "Rescale distance between variants if gap is larger than this value." +] + +gap_scale_default: gap_scale = 20_000 + +include_edges: TypeAlias = Annotated[ + bool, + """ + If True, report scores even if EHH does not decay below min_ehh at the + end of the chromosome. + """, +] + +use_threads: TypeAlias = Annotated[ + bool, "If True, use multiple threads to compute iHS." +] + +palette: TypeAlias = Annotated[ + str, "Name of bokeh palette to use for plotting multiple percentiles." +] + +palette_default: palette = "Blues" diff --git a/malariagen_data/anoph/map_params.py b/malariagen_data/anoph/map_params.py new file mode 100644 index 000000000..6ce3fc88d --- /dev/null +++ b/malariagen_data/anoph/map_params.py @@ -0,0 +1,53 @@ +"""Parameters for functions plotting maps using ipyleaflet.""" + +from typing import Dict, Tuple, Union + +import ipyleaflet +import xyzservices +from typing_extensions import Annotated, TypeAlias + +center: TypeAlias = Annotated[ + Tuple[int, int], + "Location to center the map.", +] + +center_default: center = (-2, 20) + +zoom: TypeAlias = Annotated[int, "Initial zoom level."] + +zoom_default: zoom = 3 + +basemap_abbrevs = { + "mapnik": ipyleaflet.basemaps.OpenStreetMap.Mapnik, + "natgeoworldmap": ipyleaflet.basemaps.Esri.NatGeoWorldMap, + "opentopomap": ipyleaflet.basemaps.OpenTopoMap, + "positron": ipyleaflet.basemaps.CartoDB.Positron, + "satellite": ipyleaflet.basemaps.Gaode.Satellite, + "terrain": ipyleaflet.basemaps.Stamen.Terrain, + "watercolor": ipyleaflet.basemaps.Stamen.Watercolor, + "worldimagery": ipyleaflet.basemaps.Esri.WorldImagery, + "worldstreetmap": ipyleaflet.basemaps.Esri.WorldStreetMap, + "worldtopomap": ipyleaflet.basemaps.Esri.WorldTopoMap, +} + +basemap: TypeAlias = Annotated[ + Union[str, Dict, ipyleaflet.TileLayer, xyzservices.lib.TileProvider], + f""" + Basemap from ipyleaflet or other TileLayer provider. Strings are abbreviations mapped to corresponding + basemaps, available values are {list(basemap_abbrevs.keys())}. + """, +] + +basemap_default: basemap = "mapnik" + +height: TypeAlias = Annotated[ + Union[int, str], "Height of the map in pixels (px) or other units." +] + +height_default: height = 500 + +width: TypeAlias = Annotated[ + Union[int, str], "Width of the map in pixels (px) or other units." +] + +width_default: width = "100%" diff --git a/malariagen_data/anoph/pca_params.py b/malariagen_data/anoph/pca_params.py new file mode 100644 index 000000000..74155c122 --- /dev/null +++ b/malariagen_data/anoph/pca_params.py @@ -0,0 +1,64 @@ +"""Parameters for PCA functions.""" + +import numpy as np +import pandas as pd +from typing_extensions import Annotated, TypeAlias + +n_snps: TypeAlias = Annotated[ + int, + """ + The desired number of SNPs to use when running the analysis. + SNPs will be evenly thinned to approximately this number. + """, +] + +thin_offset: TypeAlias = Annotated[ + int, + """ + Starting index for SNP thinning. Change this to repeat the analysis + using a different set of SNPs. + """, +] + +thin_offset_default: thin_offset = 0 + +min_minor_ac: TypeAlias = Annotated[ + int, + """ + The minimum minor allele count. SNPs with a minor allele count + below this value will be excluded prior to thinning. + """, +] + +min_minor_ac_default: min_minor_ac = 2 + +max_missing_an: TypeAlias = Annotated[ + int, + """ + The maximum number of missing allele calls to accept. SNPs with + more than this value will be excluded prior to thinning. Set to 0 + (default) to require no missing calls. + """, +] + +max_missing_an_default = 0 + +n_components: TypeAlias = Annotated[ + int, + "Number of components to return.", +] + +n_components_default: n_components = 20 + +df_pca: TypeAlias = Annotated[ + pd.DataFrame, + """ + A dataframe of sample metadata, with columns "PC1", "PC2", "PC3", + etc., added. + """, +] + +evr: TypeAlias = Annotated[ + np.ndarray, + "An array of explained variance ratios, one per component.", +] diff --git a/malariagen_data/anoph/plotly_params.py b/malariagen_data/anoph/plotly_params.py new file mode 100644 index 000000000..d5396273c --- /dev/null +++ b/malariagen_data/anoph/plotly_params.py @@ -0,0 +1,132 @@ +"""Parameters for any plotting functions using plotly.""" + +# N.B., most of these parameters are always able to take None +# and so we set as Optional here, rather than having to repeat +# that for each function doc. + +from typing import List, Literal, Optional, Union + +import plotly.graph_objects as go +from typing_extensions import Annotated, TypeAlias + +x_label: TypeAlias = Annotated[ + Optional[str], + "X axis label.", +] + +y_label: TypeAlias = Annotated[ + Optional[str], + "Y axis label.", +] + +width: TypeAlias = Annotated[ + Optional[int], + "Plot width in pixels (px).", +] + +height: TypeAlias = Annotated[ + Optional[int], + "Plot height in pixels (px).", +] + +aspect: TypeAlias = Annotated[ + Optional[Literal["equal", "auto"]], + "Aspect ratio, see also https://plotly.com/python-api-reference/generated/plotly.express.imshow", +] + +title: TypeAlias = Annotated[ + Optional[Union[str, bool]], + """ + If True, attempt to use metadata from input dataset as a plot title. + Otherwise, use supplied value as a title. + """, +] + +text_auto: TypeAlias = Annotated[ + Union[bool, str], + """ + If True or a string, single-channel img values will be displayed as text. A + string like '.2f' will be interpreted as a texttemplate numeric formatting + directive. + """, +] + +color_continuous_scale: TypeAlias = Annotated[ + Optional[Union[str, List[str]]], + """ + Colormap used to map scalar data to colors (for a 2D image). This + parameter is not used for RGB or RGBA images. If a string is provided, + it should be the name of a known color scale, and if a list is provided, + it should be a list of CSS-compatible colors. + """, +] + +colorbar: TypeAlias = Annotated[ + bool, + "If False, do not display a color bar.", +] + +x: TypeAlias = Annotated[ + str, + "Name of variable to plot on the X axis.", +] + +y: TypeAlias = Annotated[ + str, + "Name of variable to plot on the Y axis.", +] + +z: TypeAlias = Annotated[ + str, + "Name of variable to plot on the Z axis.", +] + +color: TypeAlias = Annotated[ + Optional[str], + "Name of variable to use to color the markers.", +] + +symbol: TypeAlias = Annotated[ + Optional[str], + "Name of the variable to use to choose marker symbols.", +] + +jitter_frac: TypeAlias = Annotated[ + Optional[float], + "Randomly jitter points by this fraction of their range.", +] + +marker_size: TypeAlias = Annotated[ + int, + "Marker size.", +] + +template: TypeAlias = Annotated[ + Optional[ + Literal[ + "ggplot2", + "seaborn", + "simple_white", + "plotly", + "plotly_white", + "plotly_dark", + "presentation", + "xgridoff", + "ygridoff", + "gridon", + "none", + ] + ], + "The figure template name (must be a key in plotly.io.templates).", +] + +show: TypeAlias = Annotated[ + bool, + "If true, show the plot. If False, do not show the plot, but return the figure.", +] + +renderer: TypeAlias = Annotated[Optional[str], "The name of the renderer to use."] + +figure: TypeAlias = Annotated[ + Optional[go.Figure], "A plotly figure (only returned if show=False)." +] diff --git a/malariagen_data/anoph/sample_metadata.py b/malariagen_data/anoph/sample_metadata.py index 2e847bece..1ecffbe57 100644 --- a/malariagen_data/anoph/sample_metadata.py +++ b/malariagen_data/anoph/sample_metadata.py @@ -4,64 +4,11 @@ import ipyleaflet import numpy as np import pandas as pd -import xyzservices from numpydoc_decorator import doc -from typing_extensions import Annotated, TypeAlias from ..util import check_types -from .base import AnophelesBase, base_params - - -class map_params: - center: TypeAlias = Annotated[ - Tuple[int, int], - "Location to center the map.", - ] - center_default: center = (-2, 20) - zoom: TypeAlias = Annotated[int, "Initial zoom level."] - zoom_default: zoom = 3 - basemap: TypeAlias = Annotated[ - Union[str, Dict, ipyleaflet.TileLayer, xyzservices.lib.TileProvider], - """ - Basemap from ipyleaflet or other TileLayer provider. Strings are abbreviations mapped to corresponding - basemaps, e.g. "mapnik" (case-insensitive) maps to TileProvider ipyleaflet.basemaps.OpenStreetMap.Mapnik. - """, - ] - basemap_default: basemap = "mapnik" - height: TypeAlias = Annotated[ - Union[int, str], "Height of the map in pixels (px) or other units." - ] - height_default: height = 500 - width: TypeAlias = Annotated[ - Union[int, str], "Width of the map in pixels (px) or other units." - ] - width_default: width = "100%" - - -def _get_basemap_abbrevs(): - """Get the dict of basemap abbreviations. - - Returns - ------- - basemap_abbrevs : dict - A dictionary where each key is a basemap abbreviation string, e.g. "mapnik", - and each value is a corresponding TileProvider, e.g. `ipyleaflet.basemaps.OpenStreetMap.Mapnik`. - """ - import ipyleaflet - - basemap_abbrevs = { - "mapnik": ipyleaflet.basemaps.OpenStreetMap.Mapnik, - "natgeoworldmap": ipyleaflet.basemaps.Esri.NatGeoWorldMap, - "opentopomap": ipyleaflet.basemaps.OpenTopoMap, - "positron": ipyleaflet.basemaps.CartoDB.Positron, - "satellite": ipyleaflet.basemaps.Gaode.Satellite, - "terrain": ipyleaflet.basemaps.Stamen.Terrain, - "watercolor": ipyleaflet.basemaps.Stamen.Watercolor, - "worldimagery": ipyleaflet.basemaps.Esri.WorldImagery, - "worldstreetmap": ipyleaflet.basemaps.Esri.WorldStreetMap, - "worldtopomap": ipyleaflet.basemaps.Esri.WorldTopoMap, - } - return basemap_abbrevs +from . import base_params, map_params +from .base import AnophelesBase class AnophelesSampleMetadata(AnophelesBase): @@ -509,10 +456,14 @@ def sample_metadata( df_samples = self.general_metadata(sample_sets=prepped_sample_sets) if self._aim_analysis: df_aim = self.aim_metadata(sample_sets=prepped_sample_sets) - df_samples = df_samples.merge(df_aim, on="sample_id", sort=False) + df_samples = df_samples.merge( + df_aim, on="sample_id", sort=False, how="left" + ) if self._cohorts_analysis: df_cohorts = self.cohorts_metadata(sample_sets=prepped_sample_sets) - df_samples = df_samples.merge(df_cohorts, on="sample_id", sort=False) + df_samples = df_samples.merge( + df_cohorts, on="sample_id", sort=False, how="left" + ) # Store sample metadata in the cache. self._cache_sample_metadata[cache_key] = df_samples @@ -644,16 +595,18 @@ def plot_samples_interactive_map( ) # Handle basemap. - basemap_providers_dict = _get_basemap_abbrevs() + basemap_abbrevs = map_params.basemap_abbrevs # Determine basemap_provider via basemap if isinstance(basemap, str): # Interpret string # Support case-insensitive basemap abbreviations basemap_str = basemap.lower() - if basemap_str not in basemap_providers_dict: - raise ValueError("Basemap abbreviation not recognised:", basemap_str) - basemap_provider = basemap_providers_dict[basemap_str] + if basemap_str not in basemap_abbrevs: + raise ValueError( + f"Basemap abbreviation not recognised: {basemap_str!r}; try one of {list(basemap_abbrevs.keys())}" + ) + basemap_provider = basemap_abbrevs[basemap_str] elif basemap is None: # Default. basemap_provider = ipyleaflet.basemaps.Esri.WorldImagery diff --git a/malariagen_data/anoph/snp_data.py b/malariagen_data/anoph/snp_data.py index 98694387b..73f5e43fa 100644 --- a/malariagen_data/anoph/snp_data.py +++ b/malariagen_data/anoph/snp_data.py @@ -28,7 +28,8 @@ simple_xarray_concat, true_runs, ) -from .base import DEFAULT, base_params +from . import base_params +from .base_params import DEFAULT from .genome_features import AnophelesGenomeFeaturesData, gplt_params from .genome_sequence import AnophelesGenomeSequenceData from .sample_metadata import AnophelesSampleMetadata diff --git a/malariagen_data/anopheles.py b/malariagen_data/anopheles.py index 838d1a86e..3015680ea 100644 --- a/malariagen_data/anopheles.py +++ b/malariagen_data/anopheles.py @@ -17,574 +17,48 @@ import plotly.express as px import plotly.graph_objects as go import xarray as xr -import zarr from numpydoc_decorator import doc -from typing_extensions import Annotated, Literal, TypeAlias from . import veff -from .anoph.base import DEFAULT, AnophelesBase, base_params -from .anoph.genome_features import AnophelesGenomeFeaturesData, gplt_params +from .anoph import ( + base_params, + dash_params, + frq_params, + fst_params, + g123_params, + gplt_params, + h12_params, + hapclust_params, + hapnet_params, + het_params, + ihs_params, + map_params, + pca_params, + plotly_params, +) +from .anoph.base import AnophelesBase +from .anoph.base_params import DEFAULT +from .anoph.genome_features import AnophelesGenomeFeaturesData from .anoph.genome_sequence import AnophelesGenomeSequenceData -from .anoph.sample_metadata import AnophelesSampleMetadata, map_params +from .anoph.hap_data import AnophelesHapData, hap_params +from .anoph.sample_metadata import AnophelesSampleMetadata from .anoph.snp_data import AnophelesSnpData from .mjn import median_joining_network, mjn_graph from .util import ( - DIM_ALLELE, - DIM_PLOIDY, - DIM_SAMPLE, - DIM_VARIANT, CacheMiss, Region, check_types, - da_from_zarr, - init_zarr_store, jackknife_ci, jitter, locate_region, - parse_multi_region, parse_single_region, plotly_discrete_legend, - simple_xarray_concat, ) AA_CHANGE_QUERY = ( "effect in ['NON_SYNONYMOUS_CODING', 'START_LOST', 'STOP_LOST', 'STOP_GAINED']" ) - -class hap_params: - """Parameter definitions for haplotype functions.""" - - analysis: TypeAlias = Annotated[ - str, - """ - Which haplotype phasing analysis to use. See the - `phasing_analysis_ids` property for available values. - """, - ] - - -class h12_params: - """Parameter definitions for H12 analysis functions.""" - - window_sizes: TypeAlias = Annotated[ - Tuple[int, ...], - """ - The sizes of windows (number of SNPs) used to calculate statistics within. - """, - ] - window_sizes_default: window_sizes = (100, 200, 500, 1000, 2000, 5000, 10000, 20000) - window_size: TypeAlias = Annotated[ - int, - """ - The size of windows (number of SNPs) used to calculate statistics within. - """, - ] - cohort_size_default: Optional[base_params.cohort_size] = None - min_cohort_size_default: base_params.min_cohort_size = 15 - max_cohort_size_default: base_params.max_cohort_size = 50 - - -class g123_params: - """Parameter definitions for G123 analysis functions.""" - - sites: TypeAlias = Annotated[ - str, - """ - Which sites to use: 'all' includes all sites that pass - site filters; 'segregating' includes only segregating sites for - the given cohort; or a phasing analysis identifier can be - provided to use sites from the haplotype data, which is an - approximation to finding segregating sites in the entire Ag3.0 - (gambiae complex) or Af1.0 (funestus) cohort. - """, - ] - window_sizes: TypeAlias = Annotated[ - Tuple[int, ...], - """ - The sizes of windows (number of sites) used to calculate statistics within. - """, - ] - window_sizes_default: window_sizes = (100, 200, 500, 1000, 2000, 5000, 10000, 20000) - window_size: TypeAlias = Annotated[ - int, - """ - The size of windows (number of sites) used to calculate statistics within. - """, - ] - min_cohort_size_default: base_params.min_cohort_size = 20 - max_cohort_size_default: base_params.max_cohort_size = 50 - - -class fst_params: - """Parameter definitions for Fst functions.""" - - # N.B., window size can mean different things for different functions - window_size: TypeAlias = Annotated[ - int, - "The size of windows (number of sites) used to calculate statistics within.", - ] - cohort_size_default: Optional[base_params.cohort_size] = None - min_cohort_size_default: base_params.min_cohort_size = 15 - max_cohort_size_default: base_params.max_cohort_size = 50 - - -class frq_params: - """Parameter definitions for functions computing and plotting allele frequencies.""" - - drop_invariant: TypeAlias = Annotated[ - bool, - """ - If True, drop variants not observed in the selected samples. - """, - ] - effects: TypeAlias = Annotated[bool, "If True, add SNP effect annotations."] - area_by: TypeAlias = Annotated[ - str, - """ - Column name in the sample metadata to use to group samples spatially. E.g., - use "admin1_iso" or "admin1_name" to group by level 1 administrative - divisions, or use "admin2_name" to group by level 2 administrative - divisions. - """, - ] - period_by: TypeAlias = Annotated[ - Literal["year", "quarter", "month"], - "Length of time to group samples temporally.", - ] - variant_query: TypeAlias = Annotated[ - str, - "A pandas query to be evaluated against variants.", - ] - nobs_mode: TypeAlias = Annotated[ - Literal["called", "fixed"], - """ - Method for calculating the denominator when computing frequencies. If - "called" then use the number of called alleles, i.e., number of samples - with non-missing genotype calls multiplied by 2. If "fixed" then use the - number of samples multiplied by 2. - """, - ] - nobs_mode_default: nobs_mode = "called" - ci_method: TypeAlias = Annotated[ - Literal["normal", "agresti_coull", "beta", "wilson", "binom_test"], - """ - Method to use for computing confidence intervals, passed through to - `statsmodels.stats.proportion.proportion_confint`. - """, - ] - ci_method_default: ci_method = "wilson" - ds_frequencies_advanced: TypeAlias = Annotated[ - xr.Dataset, - """ - A dataset of variant frequencies, such as returned by - `snp_allele_frequencies_advanced()`, - `aa_allele_frequencies_advanced()` or - `gene_cnv_frequencies_advanced()`. - """, - ] - - -class het_params: - """Parameters for functions related to heterozygosity and runs of homozygosity.""" - - single_sample: TypeAlias = Annotated[ - Union[str, int], - "Sample identifier or index within sample set.", - ] - sample: TypeAlias = Annotated[ - Union[single_sample, List[single_sample], Tuple[single_sample, ...]], - "Sample identifier or index within sample set. Multiple values can also be provided as a list or tuple.", - ] - window_size: TypeAlias = Annotated[ - int, - "Number of sites per window.", - ] - window_size_default: window_size = 20_000 - phet_roh: TypeAlias = Annotated[ - float, - "Probability of observing a heterozygote in a ROH.", - ] - phet_roh_default: phet_roh = 0.001 - phet_nonroh: TypeAlias = Annotated[ - Tuple[float, ...], - "One or more probabilities of observing a heterozygote outside a ROH.", - ] - phet_nonroh_default: phet_nonroh = (0.003, 0.01) - transition: TypeAlias = Annotated[ - float, - """ - Probability of moving between states. A larger window size may call - for a larger transitional probability. - """, - ] - transition_default: transition = 0.001 - y_max: TypeAlias = Annotated[ - float, - "Y axis limit.", - ] - y_max_default: y_max = 0.03 - circle_kwargs: TypeAlias = Annotated[ - Mapping, - "Passed through to bokeh circle() function.", - ] - df_roh: TypeAlias = Annotated[ - pd.DataFrame, - """ - A DataFrame where each row provides data about a single run of - homozygosity. - """, - ] - heterozygosity_height: TypeAlias = Annotated[ - int, - "Height in pixels (px) of heterozygosity track.", - ] - roh_height: TypeAlias = Annotated[ - int, - "Height in pixels (px) of runs of homozygosity track.", - ] - - -class pca_params: - """Parameters for PCA functions.""" - - n_snps: TypeAlias = Annotated[ - int, - """ - The desired number of SNPs to use when running the analysis. - SNPs will be evenly thinned to approximately this number. - """, - ] - thin_offset: TypeAlias = Annotated[ - int, - """ - Starting index for SNP thinning. Change this to repeat the analysis - using a different set of SNPs. - """, - ] - thin_offset_default: thin_offset = 0 - min_minor_ac: TypeAlias = Annotated[ - int, - """ - The minimum minor allele count. SNPs with a minor allele count - below this value will be excluded prior to thinning. - """, - ] - min_minor_ac_default: min_minor_ac = 2 - max_missing_an: TypeAlias = Annotated[ - int, - """ - The maximum number of missing allele calls to accept. SNPs with - more than this value will be excluded prior to thinning. Set to 0 - (default) to require no missing calls. - """, - ] - max_missing_an_default = 0 - n_components: TypeAlias = Annotated[ - int, - "Number of components to return.", - ] - n_components_default: n_components = 20 - df_pca: TypeAlias = Annotated[ - pd.DataFrame, - """ - A dataframe of sample metadata, with columns "PC1", "PC2", "PC3", - etc., added. - """, - ] - evr: TypeAlias = Annotated[ - np.ndarray, - "An array of explained variance ratios, one per component.", - ] - - -class plotly_params: - """Parameters for any plotting functions using plotly.""" - - # N.B., most of these parameters are always able to take None - # and so we set as Optional here, rather than having to repeat - # that for each function doc. - - x_label: TypeAlias = Annotated[ - Optional[str], - "X axis label.", - ] - y_label: TypeAlias = Annotated[ - Optional[str], - "Y axis label.", - ] - width: TypeAlias = Annotated[ - Optional[int], - "Plot width in pixels (px).", - ] - height: TypeAlias = Annotated[ - Optional[int], - "Plot height in pixels (px).", - ] - aspect: TypeAlias = Annotated[ - Optional[Literal["equal", "auto"]], - "Aspect ratio, see also https://plotly.com/python-api-reference/generated/plotly.express.imshow", - ] - title: TypeAlias = Annotated[ - Optional[Union[str, bool]], - """ - If True, attempt to use metadata from input dataset as a plot title. - Otherwise, use supplied value as a title. - """, - ] - text_auto: TypeAlias = Annotated[ - Union[bool, str], - """ - If True or a string, single-channel img values will be displayed as text. A - string like '.2f' will be interpreted as a texttemplate numeric formatting - directive. - """, - ] - color_continuous_scale: TypeAlias = Annotated[ - Optional[Union[str, List[str]]], - """ - Colormap used to map scalar data to colors (for a 2D image). This - parameter is not used for RGB or RGBA images. If a string is provided, - it should be the name of a known color scale, and if a list is provided, - it should be a list of CSS-compatible colors. - """, - ] - colorbar: TypeAlias = Annotated[ - bool, - "If False, do not display a color bar.", - ] - x: TypeAlias = Annotated[ - str, - "Name of variable to plot on the X axis.", - ] - y: TypeAlias = Annotated[ - str, - "Name of variable to plot on the Y axis.", - ] - z: TypeAlias = Annotated[ - str, - "Name of variable to plot on the Z axis.", - ] - color: TypeAlias = Annotated[ - Optional[str], - "Name of variable to use to color the markers.", - ] - symbol: TypeAlias = Annotated[ - Optional[str], - "Name of the variable to use to choose marker symbols.", - ] - jitter_frac: TypeAlias = Annotated[ - Optional[float], - "Randomly jitter points by this fraction of their range.", - ] - marker_size: TypeAlias = Annotated[ - int, - "Marker size.", - ] - template: TypeAlias = Annotated[ - Optional[ - Literal[ - "ggplot2", - "seaborn", - "simple_white", - "plotly", - "plotly_white", - "plotly_dark", - "presentation", - "xgridoff", - "ygridoff", - "gridon", - "none", - ] - ], - "The figure template name (must be a key in plotly.io.templates).", - ] - show: TypeAlias = Annotated[ - bool, - "If true, show the plot. If False, do not show the plot, but return the figure.", - ] - renderer: TypeAlias = Annotated[Optional[str], "The name of the renderer to use."] - figure: TypeAlias = Annotated[ - Optional[go.Figure], "A plotly figure (only returned if show=False)." - ] - - -class ihs_params: - window_size: TypeAlias = Annotated[ - int, - """ - The size of window in number of SNPs used to summarise iHS over. - If None, per-variant iHS values are returned. - """, - ] - window_size_default: window_size = 200 - min_cohort_size_default: base_params.min_cohort_size = 15 - max_cohort_size_default: base_params.max_cohort_size = 50 - percentiles: TypeAlias = Annotated[ - Union[int, Tuple[int, ...]], - """ - If window size is specified, this returns the iHS percentiles - for each window. - """, - ] - percentiles_default: percentiles = (50, 75, 100) - standardize: TypeAlias = Annotated[ - bool, "If True, standardize iHS values by alternate allele counts." - ] - standardization_bins: TypeAlias = Annotated[ - Tuple[float, ...], - "If provided, use these allele count bins to standardize iHS values.", - ] - standardization_n_bins: TypeAlias = Annotated[ - int, - """ - Number of allele count bins to use for standardization. - Overrides standardization_bins. - """, - ] - standardization_n_bins_default: standardization_n_bins = 20 - standardization_diagnostics: TypeAlias = Annotated[ - bool, "If True, plot some diagnostics about the standardization." - ] - filter_min_maf: TypeAlias = Annotated[ - float, - """ - Minimum minor allele frequency to use for filtering prior to passing - haplotypes to allel.ihs function - """, - ] - filter_min_maf_default: filter_min_maf = 0.05 - compute_min_maf: TypeAlias = Annotated[ - float, - """ - Do not compute integrated haplotype homozygosity for variants with - minor allele frequency below this threshold. - """, - ] - compute_min_maf_default: compute_min_maf = 0.05 - min_ehh: TypeAlias = Annotated[ - float, - """ - Minimum EHH beyond which to truncate integrated haplotype homozygosity - calculation. - """, - ] - min_ehh_default: min_ehh = 0.05 - max_gap: TypeAlias = Annotated[ - int, - """ - Do not report scores if EHH spans a gap larger than this number of - base pairs. - """, - ] - max_gap_default: max_gap = 200_000 - gap_scale: TypeAlias = Annotated[ - int, "Rescale distance between variants if gap is larger than this value." - ] - gap_scale_default: gap_scale = 20_000 - include_edges: TypeAlias = Annotated[ - bool, - """ - If True, report scores even if EHH does not decay below min_ehh at the - end of the chromosome. - """, - ] - use_threads: TypeAlias = Annotated[ - bool, "If True, use multiple threads to compute iHS." - ] - palette: TypeAlias = Annotated[ - str, "Name of bokeh palette to use for plotting multiple percentiles." - ] - palette_default: palette = "Blues" - - -class hapclust_params: - linkage_method: TypeAlias = Annotated[ - Literal[ - "single", "complete", "average", "weighted", "centroid", "median", "ward" - ], - """ - The linkage algorithm to use. See the Linkage Methods section of the - scipy.cluster.hierarchy.linkage docs for full descriptions. - """, - ] - linkage_method_default: linkage_method = "single" - count_sort: TypeAlias = Annotated[ - bool, - """ - For each node n, the order (visually, from left-to-right) n's two descendant - links are plotted is determined by this parameter. If True, the child with - the minimum number of original objects in its cluster is plotted first. Note - distance_sort and count_sort cannot both be True. - """, - ] - distance_sort: TypeAlias = Annotated[ - bool, - """ - For each node n, the order (visually, from left-to-right) n's two descendant - links are plotted is determined by this parameter. If True, The child with the - minimum distance between its direct descendants is plotted first. - """, - ] - - -class hapnet_params: - max_dist: TypeAlias = Annotated[ - int, - "Join network components up to a maximum distance of 2 SNP differences.", - ] - max_dist_default: max_dist = 2 - color: TypeAlias = Annotated[ - str, - """ - Identifies a column in the sample metadata which determines the colour - of pie chart segments within nodes. - """, - ] - color_discrete_sequence: TypeAlias = Annotated[ - List, "Provide a list of colours to use." - ] - color_discrete_map: TypeAlias = Annotated[ - Mapping, "Provide an explicit mapping from values to colours." - ] - category_order: TypeAlias = Annotated[ - List, - "Control the order in which values appear in the legend.", - ] - node_size_factor: TypeAlias = Annotated[ - int, - "Control the sizing of nodes.", - ] - node_size_factor_default: node_size_factor = 50 - layout: TypeAlias = Annotated[ - str, - "Name of the network layout to use to position nodes.", - ] - layout_default: layout = "cose" - layout_params: TypeAlias = Annotated[ - Mapping, - "Additional parameters to the layout algorithm.", - ] - - -class dash_params: - height: TypeAlias = Annotated[int, "Height of the Dash app in pixels (px)."] - width: TypeAlias = Annotated[Union[int, str], "Width of the Dash app."] - server_mode: TypeAlias = Annotated[ - Literal["inline", "external", "jupyterlab"], - """ - Controls how the Jupyter Dash app will be launched. See - https://medium.com/plotly/introducing-jupyterdash-811f1f57c02e for - more information. - """, - ] - server_mode_default: server_mode = "inline" - server_port: TypeAlias = Annotated[ - int, - "Manually override the port on which the Dash app will run.", - ] - - # N.B., we are in the process of breaking up the AnophelesDataResource # class into multiple parent classes like AnophelesGenomeSequenceData # and AnophelesBase. This is work in progress, and further PRs are @@ -607,6 +81,7 @@ class dash_params: # work around pycharm failing to recognise that doc() is callable # noinspection PyCallingNonCallable class AnophelesDataResource( + AnophelesHapData, AnophelesSnpData, AnophelesSampleMetadata, AnophelesGenomeFeaturesData, @@ -624,6 +99,7 @@ def __init__( aim_metadata_dtype: Optional[Mapping[str, Any]], site_filters_analysis: Optional[str], default_site_mask: Optional[str], + default_phasing_analysis: Optional[str], bokeh_output_notebook: bool, results_cache: Optional[str], log, @@ -658,27 +134,19 @@ def __init__( aim_metadata_dtype=aim_metadata_dtype, site_filters_analysis=site_filters_analysis, default_site_mask=default_site_mask, + default_phasing_analysis=default_phasing_analysis, results_cache=results_cache, ) # set up caches # TODO review type annotations here, maybe can tighten self._cache_annotator = None - self._cache_site_annotations = None - self._cache_locate_site_class: Dict = dict() - self._cache_haplotypes: Dict = dict() - self._cache_haplotype_sites: Dict = dict() @property @abstractmethod def _pca_results_cache_name(self): raise NotImplementedError("Must override _pca_results_cache_name") - @property - @abstractmethod - def _snp_allele_counts_results_cache_name(self): - raise NotImplementedError("Must override _snp_allele_counts_results_cache_name") - @property @abstractmethod def _fst_gwss_results_cache_name(self): @@ -714,11 +182,6 @@ def _h1x_gwss_cache_name(self): def _ihs_gwss_cache_name(self): raise NotImplementedError("Must override _ihs_gwss_cache_name") - @property - @abstractmethod - def _site_annotations_zarr_path(self): - raise NotImplementedError("Must override _site_annotations_zarr_path") - @abstractmethod def _transcript_to_gene_name(self, transcript): # children may have different manual overrides. @@ -733,31 +196,6 @@ def _view_alignments_add_site_filters_tracks( "Must override _view_alignments_add_site_filters_tracks" ) - @property - @abstractmethod - def phasing_analysis_ids(self): - """Identifiers for the different phasing analyses that are available. - These are values than can be used for the `analysis` parameter in any - method making using of haplotype data. - - """ - # Not all children have the same phasing analysis IDs. - raise NotImplementedError("Must override _phasing_analysis_ids") - - @property - @abstractmethod - def _default_phasing_analysis(self): - raise NotImplementedError("Must override _default_phasing_analysis") - - def _prep_phasing_analysis_param(self, *, analysis): - if analysis == DEFAULT: - analysis = self._default_phasing_analysis - if analysis not in self.phasing_analysis_ids: - raise ValueError( - f"Invalid phasing analysis, must be one of f{self.phasing_analysis_ids}." - ) - return analysis - @check_types @doc( summary=""" @@ -3928,228 +3366,6 @@ def plot_fst_gwss( else: return fig - @check_types - @doc( - summary="Open haplotypes zarr.", - returns="Zarr hierarchy.", - ) - def open_haplotypes( - self, - sample_set: base_params.sample_set, - analysis: hap_params.analysis = DEFAULT, - ) -> Optional[zarr.hierarchy.Group]: - analysis = self._prep_phasing_analysis_param(analysis=analysis) - try: - return self._cache_haplotypes[(sample_set, analysis)] - except KeyError: - release = self.lookup_release(sample_set=sample_set) - release_path = self._release_to_path(release) - path = f"{self._base_path}/{release_path}/snp_haplotypes/{sample_set}/{analysis}/zarr" - store = init_zarr_store(fs=self._fs, path=path) - # Some sample sets have no data for a given analysis, handle this. - try: - root = zarr.open_consolidated(store=store) - except FileNotFoundError: - root = None - self._cache_haplotypes[(sample_set, analysis)] = root - return root - - @check_types - @doc( - summary="Open haplotype sites zarr.", - returns="Zarr hierarchy.", - ) - def open_haplotype_sites( - self, analysis: hap_params.analysis = DEFAULT - ) -> zarr.hierarchy.Group: - analysis = self._prep_phasing_analysis_param(analysis=analysis) - try: - return self._cache_haplotype_sites[analysis] - except KeyError: - path = f"{self._base_path}/{self._major_version_path}/snp_haplotypes/sites/{analysis}/zarr" - store = init_zarr_store(fs=self._fs, path=path) - root = zarr.open_consolidated(store=store) - self._cache_haplotype_sites[analysis] = root - return root - - def _haplotype_sites_for_contig( - self, *, contig, analysis, field, inline_array, chunks - ): - sites = self.open_haplotype_sites(analysis=analysis) - arr = sites[f"{contig}/variants/{field}"] - arr = da_from_zarr(arr, inline_array=inline_array, chunks=chunks) - return arr - - def _haplotypes_for_contig( - self, *, contig, sample_set, analysis, inline_array, chunks - ): - debug = self._log.debug - - debug("open zarr") - root = self.open_haplotypes(sample_set=sample_set, analysis=analysis) - sites = self.open_haplotype_sites(analysis=analysis) - - debug("variant_position") - pos = sites[f"{contig}/variants/POS"] - - # some sample sets have no data for a given analysis, handle this - # TODO consider returning a dataset with 0 length samples dimension instead, would - # probably simplify a lot of other logic - if root is None: - return None - - coords = dict() - data_vars = dict() - - coords["variant_position"] = ( - [DIM_VARIANT], - da_from_zarr(pos, inline_array=inline_array, chunks=chunks), - ) - - debug("variant_contig") - contig_index = self.contigs.index(contig) - coords["variant_contig"] = ( - [DIM_VARIANT], - da.full_like(pos, fill_value=contig_index, dtype="u1"), - ) - - debug("variant_allele") - ref = da_from_zarr( - sites[f"{contig}/variants/REF"], inline_array=inline_array, chunks=chunks - ) - alt = da_from_zarr( - sites[f"{contig}/variants/ALT"], inline_array=inline_array, chunks=chunks - ) - variant_allele = da.hstack([ref[:, None], alt[:, None]]) - data_vars["variant_allele"] = [DIM_VARIANT, DIM_ALLELE], variant_allele - - debug("call_genotype") - data_vars["call_genotype"] = ( - [DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY], - da_from_zarr( - root[f"{contig}/calldata/GT"], inline_array=inline_array, chunks=chunks - ), - ) - - debug("sample arrays") - coords["sample_id"] = ( - [DIM_SAMPLE], - da_from_zarr(root["samples"], inline_array=inline_array, chunks=chunks), - ) - - debug("set up attributes") - attrs = {"contigs": self.contigs} - - debug("create a dataset") - ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) - - return ds - - @check_types - @doc( - summary="Access haplotype data.", - returns="A dataset of haplotypes and associated data.", - ) - def haplotypes( - self, - region: base_params.region, - analysis: hap_params.analysis = DEFAULT, - sample_sets: Optional[base_params.sample_sets] = None, - sample_query: Optional[base_params.sample_query] = None, - inline_array: base_params.inline_array = base_params.inline_array_default, - chunks: base_params.chunks = base_params.chunks_default, - cohort_size: Optional[base_params.cohort_size] = None, - min_cohort_size: Optional[base_params.min_cohort_size] = None, - max_cohort_size: Optional[base_params.max_cohort_size] = None, - random_seed: base_params.random_seed = 42, - ) -> Optional[xr.Dataset]: - debug = self._log.debug - - debug("normalise parameters") - sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) - regions: List[Region] = parse_multi_region(self, region) - del region - analysis = self._prep_phasing_analysis_param(analysis=analysis) - - debug("build dataset") - lx = [] - for r in regions: - ly = [] - - for s in sample_sets: - y = self._haplotypes_for_contig( - contig=r.contig, - sample_set=s, - analysis=analysis, - inline_array=inline_array, - chunks=chunks, - ) - if y is not None: - ly.append(y) - - if len(ly) == 0: - debug("early out, no data for given sample sets and analysis") - return None - - debug("concatenate data from multiple sample sets") - x = simple_xarray_concat(ly, dim=DIM_SAMPLE) - - debug("handle region") - if r.start or r.end: - pos = x["variant_position"].values - loc_region = locate_region(r, pos) - x = x.isel(variants=loc_region) - - lx.append(x) - - debug("concatenate data from multiple regions") - ds = simple_xarray_concat(lx, dim=DIM_VARIANT) - - debug("handle sample query") - if sample_query is not None: - debug("load sample metadata") - df_samples = self.sample_metadata(sample_sets=sample_sets) - - debug("align sample metadata with haplotypes") - phased_samples = ds["sample_id"].values.tolist() - df_samples_phased = ( - df_samples.set_index("sample_id").loc[phased_samples].reset_index() - ) - - debug("apply the query") - loc_samples = df_samples_phased.eval(sample_query).values - if np.count_nonzero(loc_samples) == 0: - raise ValueError(f"No samples found for query {sample_query!r}") - ds = ds.isel(samples=loc_samples) - - debug("handle cohort size") - if cohort_size is not None: - debug("handle cohort size") - # overrides min and max - min_cohort_size = cohort_size - max_cohort_size = cohort_size - - if min_cohort_size is not None: - debug("handle min cohort size") - n_samples = ds.dims["samples"] - if n_samples < min_cohort_size: - raise ValueError( - f"not enough samples ({n_samples}) for minimum cohort size ({min_cohort_size})" - ) - - if max_cohort_size is not None: - debug("handle max cohort size") - n_samples = ds.dims["samples"] - if n_samples > max_cohort_size: - rng = np.random.default_rng(seed=random_seed) - loc_downsample = rng.choice( - n_samples, size=max_cohort_size, replace=False - ) - loc_downsample.sort() - ds = ds.isel(samples=loc_downsample) - - return ds - @check_types @doc( summary="Generate h12 GWSS calibration data for different window sizes.", @@ -5293,6 +4509,7 @@ def g123_gwss( name = self._g123_gwss_cache_name if sites == DEFAULT: + assert self._default_phasing_analysis is not None sites = self._default_phasing_analysis valid_sites = self.phasing_analysis_ids + ("all", "segregating") if sites not in valid_sites: diff --git a/malariagen_data/util.py b/malariagen_data/util.py index c6deaa706..0094cdd89 100644 --- a/malariagen_data/util.py +++ b/malariagen_data/util.py @@ -378,13 +378,15 @@ def _handle_region_coords(resource, region): end = int(region_split[2].replace(",", "")) if contig not in _valid_contigs(resource): - raise ValueError(f"Contig {contig} does not exist in the dataset.") - elif ( - start < 0 - or end <= start - or end > resource.genome_sequence(region=contig).shape[0] - ): - raise ValueError("Provided genomic coordinates are not valid.") + raise ValueError( + f"The genomic region {region!r} is invalid because contig {contig!r} does not exist in the dataset." + ) + else: + contig_length = resource.genome_sequence(region=contig).shape[0] + if start < 1 or end < start or end > contig_length: + raise ValueError( + f"The genomic region {region!r} is invalid for contig {contig!r} with length {contig_length}." + ) return Region(contig, start, end) diff --git a/notebooks/plot_samples.ipynb b/notebooks/plot_samples.ipynb index b0330199f..6b4556581 100644 --- a/notebooks/plot_samples.ipynb +++ b/notebooks/plot_samples.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -20,9 +20,108 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n // Clean up Bokeh references\n if (id != null && id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim();\n if (id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.1.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MalariaGEN Ag3 API client
\n", + " Please note that data are subject to terms of use,\n", + " for more information see \n", + " the MalariaGEN website or contact data@malariagen.net.\n", + " See also the Ag3 API docs.\n", + "
\n", + " Storage URL\n", + " simplecache::gs://vo_agam_release
\n", + " Data releases available\n", + " 3.0
\n", + " Results cache\n", + " None
\n", + " Cohorts analysis\n", + " 20230223
\n", + " AIM analysis\n", + " 20220528
\n", + " Site filters analysis\n", + " dt_20200416
\n", + " Software version\n", + " malariagen_data 0.0.0
\n", + " Client location\n", + " unknown
\n", + " " + ], + "text/plain": [ + "\n", + "Storage URL : simplecache::gs://vo_agam_release\n", + "Data releases available : 3.0\n", + "Results cache : None\n", + "Cohorts analysis : 20230223\n", + "AIM analysis : 20220528\n", + "Site filters analysis : dt_20200416\n", + "Software version : malariagen_data 0.0.0\n", + "Client location : unknown\n", + "---\n", + "Please note that data are subject to terms of use,\n", + "for more information see https://www.malariagen.net/data\n", + "or contact data@malariagen.net. For API documentation see \n", + "https://malariagen.github.io/vector-data/ag3/api.html" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ag3 = malariagen_data.Ag3(\n", " \"simplecache::gs://vo_agam_release\",\n", @@ -34,9 +133,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f94ce545a7024e0db279c58331f45637", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map(center=[-2, 20], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_tex…" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ag3.plot_samples_interactive_map(\n", " sample_sets=[\"3.0\"],\n", @@ -46,9 +161,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dd3ca7d42b8745e6880e57d79db77774", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map(center=[-2, 20], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_tex…" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Demo using a different basemap provider\n", "# - The map background will appear grey if provision of tiles fails\n", @@ -61,19 +192,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['mapnik', 'natgeoworldmap', 'opentopomap', 'positron', 'satellite', 'terrain', 'watercolor', 'worldimagery', 'worldstreetmap', 'worldtopomap'])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# See the available basemap abbreviations\n", - "malariagen_data.anoph.sample_metadata._get_basemap_abbrevs().keys()" + "malariagen_data.anoph.map_params.basemap_abbrevs.keys()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cfca3d4c4aaa4e3f9eb869e861593613", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map(center=[-2, 20], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_tex…" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Demo using a basemap abbreviation, case-insensitive\n", "ag3.plot_samples_interactive_map(\n", @@ -92,9 +250,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n // Clean up Bokeh references\n if (id != null && id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim();\n if (id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.1.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MalariaGEN Af1 API client
\n", + " Please note that data are subject to terms of use,\n", + " for more information see \n", + " the MalariaGEN website or contact data@malariagen.net.\n", + "
\n", + " Storage URL\n", + " simplecache::gs://vo_afun_release
\n", + " Data releases available\n", + " 1.0
\n", + " Results cache\n", + " None
\n", + " Cohorts analysis\n", + " 20221129
\n", + " Site filters analysis\n", + " dt_20200416
\n", + " Software version\n", + " malariagen_data 0.0.0
\n", + " Client location\n", + " unknown
\n", + " " + ], + "text/plain": [ + "\n", + "Storage URL : simplecache::gs://vo_afun_release\n", + "Data releases available : 1.0\n", + "Results cache : None\n", + "Cohorts analysis : 20221129\n", + "Site filters analysis : dt_20200416\n", + "Software version : malariagen_data 0.0.0\n", + "Client location : unknown\n", + "---\n", + "Please note that data are subject to terms of use,\n", + "for more information see https://www.malariagen.net/data\n", + "or contact data@malariagen.net." + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "af1 = malariagen_data.Af1(\n", " \"simplecache::gs://vo_afun_release\",\n", @@ -107,16 +355,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b554ac09959e4e3da81b3012ae43dd50", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map(center=[-2, 20], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_tex…" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Demo using a basemap provider the same as one in its curated list\n", "af1.plot_samples_interactive_map(\n", @@ -124,6 +381,60 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method plot_samples_interactive_map in module malariagen_data.anoph.sample_metadata:\n", + "\n", + "plot_samples_interactive_map(sample_sets: Union[Union[Sequence[str], str], NoneType] = None, sample_query: Union[str, NoneType] = None, basemap: Union[Union[str, Dict, ipyleaflet.leaflet.TileLayer, xyzservices.lib.TileProvider], NoneType] = 'mapnik', center: Tuple[int, int] = (-2, 20), zoom: int = 3, height: Union[int, str] = 500, width: Union[int, str] = '100%', min_samples: int = 1, count_by: str = 'taxon') -> ipyleaflet.leaflet.Map method of malariagen_data.ag3.Ag3 instance\n", + " Plot an interactive map showing sampling locations using ipyleaflet.\n", + " \n", + " Parameters\n", + " ----------\n", + " sample_sets : sequence of str or str or None, optional\n", + " List of sample sets and/or releases. Can also be a single sample set\n", + " or release.\n", + " sample_query : str or None, optional\n", + " A pandas query string to be evaluated against the sample metadata, to\n", + " select samples to be included in the returned data.\n", + " basemap : str or Dict or TileLayer or TileProvider or None, optional, default: 'mapnik'\n", + " Basemap from ipyleaflet or other TileLayer provider. Strings are\n", + " abbreviations mapped to corresponding basemaps, available values are\n", + " ['mapnik', 'natgeoworldmap', 'opentopomap', 'positron', 'satellite',\n", + " 'terrain', 'watercolor', 'worldimagery', 'worldstreetmap',\n", + " 'worldtopomap'].\n", + " center : Tuple[int, int], optional, default: (-2, 20)\n", + " Location to center the map.\n", + " zoom : int, optional, default: 3\n", + " Initial zoom level.\n", + " height : int or str, optional, default: 500\n", + " Height of the map in pixels (px) or other units.\n", + " width : int or str, optional, default: '100%'\n", + " Width of the map in pixels (px) or other units.\n", + " min_samples : int, optional, default: 1\n", + " Minimum number of samples required to show a marker for a given\n", + " location.\n", + " count_by : str, optional, default: 'taxon'\n", + " Metadata column to report counts of samples by for each location.\n", + " \n", + " Returns\n", + " -------\n", + " Map\n", + " Ipyleaflet map widget.\n", + "\n" + ] + } + ], + "source": [ + "help(ag3.plot_samples_interactive_map)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/notebooks/spike_sim_test_data.ipynb b/notebooks/spike_sim_test_data.ipynb index 6960743b6..a53713d71 100644 --- a/notebooks/spike_sim_test_data.ipynb +++ b/notebooks/spike_sim_test_data.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -18,9 +18,7 @@ "import allel\n", "import numpy as np\n", "import plotly.express as px\n", - "import plotly.io as pio\n", - "\n", - "pio.templates.default = \"plotly_dark\"" + "import plotly.io as pio" ] }, { @@ -28,6 +26,114 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [ + "pio.templates.default = \"plotly_dark\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n // Clean up Bokeh references\n if (id != null && id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim();\n if (id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.1.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MalariaGEN Ag3 API client
\n", + " Please note that data are subject to terms of use,\n", + " for more information see \n", + " the MalariaGEN website or contact data@malariagen.net.\n", + " See also the Ag3 API docs.\n", + "
\n", + " Storage URL\n", + " simplecache::gs://vo_agam_release
\n", + " Data releases available\n", + " 3.0
\n", + " Results cache\n", + " None
\n", + " Cohorts analysis\n", + " 20230223
\n", + " AIM analysis\n", + " 20220528
\n", + " Site filters analysis\n", + " dt_20200416
\n", + " Software version\n", + " malariagen_data 0.0.0
\n", + " Client location\n", + " unknown
\n", + " " + ], + "text/plain": [ + "\n", + "Storage URL : simplecache::gs://vo_agam_release\n", + "Data releases available : 3.0\n", + "Results cache : None\n", + "Cohorts analysis : 20230223\n", + "AIM analysis : 20220528\n", + "Site filters analysis : dt_20200416\n", + "Software version : malariagen_data 0.0.0\n", + "Client location : unknown\n", + "---\n", + "Please note that data are subject to terms of use,\n", + "for more information see https://www.malariagen.net/data\n", + "or contact data@malariagen.net. For API documentation see \n", + "https://malariagen.github.io/vector-data/ag3/api.html" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ag3 = malariagen_data.Ag3(\n", " \"simplecache::gs://vo_agam_release\",\n", @@ -38,9 +144,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n // Clean up Bokeh references\n if (id != null && id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim();\n if (id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.1.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MalariaGEN Af1 API client
\n", + " Please note that data are subject to terms of use,\n", + " for more information see \n", + " the MalariaGEN website or contact data@malariagen.net.\n", + "
\n", + " Storage URL\n", + " simplecache::gs://vo_afun_release
\n", + " Data releases available\n", + " 1.0
\n", + " Results cache\n", + " None
\n", + " Cohorts analysis\n", + " 20221129
\n", + " Site filters analysis\n", + " dt_20200416
\n", + " Software version\n", + " malariagen_data 0.0.0
\n", + " Client location\n", + " unknown
\n", + " " + ], + "text/plain": [ + "\n", + "Storage URL : simplecache::gs://vo_afun_release\n", + "Data releases available : 1.0\n", + "Results cache : None\n", + "Cohorts analysis : 20221129\n", + "Site filters analysis : dt_20200416\n", + "Software version : malariagen_data 0.0.0\n", + "Client location : unknown\n", + "---\n", + "Please note that data are subject to terms of use,\n", + "for more information see https://www.malariagen.net/data\n", + "or contact data@malariagen.net." + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "af1 = malariagen_data.Af1(\n", " \"simplecache::gs://vo_afun_release\",\n", @@ -49,6 +245,3395 @@ "af1" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Haplotypes" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                             (variants: 40758473, alleles: 4,\n",
+       "                                         samples: 181, ploidy: 2)\n",
+       "Coordinates:\n",
+       "    variant_position                    (variants) int32 dask.array<chunksize=(524288,), meta=np.ndarray>\n",
+       "    variant_contig                      (variants) uint8 dask.array<chunksize=(524288,), meta=np.ndarray>\n",
+       "    sample_id                           (samples) <U24 dask.array<chunksize=(181,), meta=np.ndarray>\n",
+       "Dimensions without coordinates: variants, alleles, samples, ploidy\n",
+       "Data variables:\n",
+       "    variant_allele                      (variants, alleles) |S1 dask.array<chunksize=(524288, 1), meta=np.ndarray>\n",
+       "    variant_filter_pass_gamb_colu_arab  (variants) bool dask.array<chunksize=(300000,), meta=np.ndarray>\n",
+       "    variant_filter_pass_gamb_colu       (variants) bool dask.array<chunksize=(300000,), meta=np.ndarray>\n",
+       "    variant_filter_pass_arab            (variants) bool dask.array<chunksize=(300000,), meta=np.ndarray>\n",
+       "    call_genotype                       (variants, samples, ploidy) int8 dask.array<chunksize=(300000, 50, 2), meta=np.ndarray>\n",
+       "    call_GQ                             (variants, samples) int16 dask.array<chunksize=(300000, 50), meta=np.ndarray>\n",
+       "    call_MQ                             (variants, samples) int16 dask.array<chunksize=(300000, 50), meta=np.ndarray>\n",
+       "    call_AD                             (variants, samples, alleles) int16 dask.array<chunksize=(300000, 50, 4), meta=np.ndarray>\n",
+       "    call_genotype_mask                  (variants, samples, ploidy) bool dask.array<chunksize=(300000, 50, 2), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    contigs:  ('2R', '2L', '3R', '3L', 'X')
" + ], + "text/plain": [ + "\n", + "Dimensions: (variants: 40758473, alleles: 4,\n", + " samples: 181, ploidy: 2)\n", + "Coordinates:\n", + " variant_position (variants) int32 dask.array\n", + " variant_contig (variants) uint8 dask.array\n", + " sample_id (samples) \n", + "Dimensions without coordinates: variants, alleles, samples, ploidy\n", + "Data variables:\n", + " variant_allele (variants, alleles) |S1 dask.array\n", + " variant_filter_pass_gamb_colu_arab (variants) bool dask.array\n", + " variant_filter_pass_gamb_colu (variants) bool dask.array\n", + " variant_filter_pass_arab (variants) bool dask.array\n", + " call_genotype (variants, samples, ploidy) int8 dask.array\n", + " call_GQ (variants, samples) int16 dask.array\n", + " call_MQ (variants, samples) int16 dask.array\n", + " call_AD (variants, samples, alleles) int16 dask.array\n", + " call_genotype_mask (variants, samples, ploidy) bool dask.array\n", + "Attributes:\n", + " contigs: ('2R', '2L', '3R', '3L', 'X')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_snps = ag3.snp_calls(region=\"3L\", sample_sets=\"AG1000G-BF-A\")\n", + "ds_snps" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:           (variants: 10319068, alleles: 2, samples: 181, ploidy: 2)\n",
+       "Coordinates:\n",
+       "    variant_position  (variants) int32 dask.array<chunksize=(262144,), meta=np.ndarray>\n",
+       "    variant_contig    (variants) uint8 dask.array<chunksize=(10319068,), meta=np.ndarray>\n",
+       "    sample_id         (samples) object dask.array<chunksize=(181,), meta=np.ndarray>\n",
+       "Dimensions without coordinates: variants, alleles, samples, ploidy\n",
+       "Data variables:\n",
+       "    variant_allele    (variants, alleles) |S1 dask.array<chunksize=(262144, 1), meta=np.ndarray>\n",
+       "    call_genotype     (variants, samples, ploidy) int8 dask.array<chunksize=(262144, 64, 2), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    contigs:  ('2R', '2L', '3R', '3L', 'X')
" + ], + "text/plain": [ + "\n", + "Dimensions: (variants: 10319068, alleles: 2, samples: 181, ploidy: 2)\n", + "Coordinates:\n", + " variant_position (variants) int32 dask.array\n", + " variant_contig (variants) uint8 dask.array\n", + " sample_id (samples) object dask.array\n", + "Dimensions without coordinates: variants, alleles, samples, ploidy\n", + "Data variables:\n", + " variant_allele (variants, alleles) |S1 dask.array\n", + " call_genotype (variants, samples, ploidy) int8 dask.array\n", + "Attributes:\n", + " contigs: ('2R', '2L', '3R', '3L', 'X')" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps = ag3.haplotypes(\n", + " region=\"3L\", sample_sets=\"AG1000G-BF-A\", analysis=\"gamb_colu_arab\"\n", + ")\n", + "ds_haps" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.2531760206031271" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps.dims[\"variants\"] / ds_snps.dims[\"variants\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.27856833596292974" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps = ag3.haplotypes(region=\"3L\", sample_sets=\"AG1000G-BF-A\", analysis=\"gamb_colu\")\n", + "ds_haps.dims[\"variants\"] / ds_snps.dims[\"variants\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.09217013601073819" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps = ag3.haplotypes(region=\"3L\", sample_sets=\"AG1000G-UG\", analysis=\"arab\")\n", + "ds_haps.dims[\"variants\"] / ds_snps.dims[\"variants\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[b'A', b'C', b'T', b'G'],\n", + " [b'C', b'A', b'T', b'G'],\n", + " [b'G', b'A', b'C', b'T'],\n", + " ...,\n", + " [b'T', b'A', b'C', b'G'],\n", + " [b'C', b'A', b'T', b'G'],\n", + " [b'C', b'A', b'T', b'G']], dtype='|S1')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "snp_alleles = ds_snps[\"variant_allele\"].values\n", + "snp_alleles" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 2, 3, ..., 41963433, 41963434, 41963435],\n", + " dtype=int32)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "snp_pos = ds_snps[\"variant_position\"].values\n", + "snp_pos" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, True, ..., False, True, False])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loc_hap_sites = np.random.choice([False, True], size=snp_pos.shape[0], p=[0.75, 0.25])\n", + "loc_hap_sites" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10192236" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_hap_sites = np.sum(loc_hap_sites)\n", + "n_hap_sites" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([b'G', b'A', b'G', ..., b'C', b'G', b'C'], dtype='|S1')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sim_hap_ref = snp_alleles[loc_hap_sites, 0]\n", + "sim_hap_ref" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(1, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([2, 3, 2, ..., 2, 3, 1])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sim_alt_choice = np.random.choice(np.arange(1, 4), size=n_hap_sites)\n", + "sim_alt_choice" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([b'C', b'G', b'C', ..., b'T', b'T', b'A'], dtype='|S1')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sim_hap_alt = np.take_along_axis(\n", + " snp_alleles[loc_hap_sites], indices=sim_alt_choice[:, None], axis=1\n", + ")[:, 0]\n", + "sim_hap_alt" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10192236,)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sim_hap_alt.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:           (variants: 10319068, alleles: 2, samples: 181, ploidy: 2)\n",
+       "Coordinates:\n",
+       "    variant_position  (variants) int32 dask.array<chunksize=(262144,), meta=np.ndarray>\n",
+       "    variant_contig    (variants) uint8 dask.array<chunksize=(10319068,), meta=np.ndarray>\n",
+       "    sample_id         (samples) object dask.array<chunksize=(181,), meta=np.ndarray>\n",
+       "Dimensions without coordinates: variants, alleles, samples, ploidy\n",
+       "Data variables:\n",
+       "    variant_allele    (variants, alleles) |S1 dask.array<chunksize=(262144, 1), meta=np.ndarray>\n",
+       "    call_genotype     (variants, samples, ploidy) int8 dask.array<chunksize=(262144, 64, 2), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    contigs:  ('2R', '2L', '3R', '3L', 'X')
" + ], + "text/plain": [ + "\n", + "Dimensions: (variants: 10319068, alleles: 2, samples: 181, ploidy: 2)\n", + "Coordinates:\n", + " variant_position (variants) int32 dask.array\n", + " variant_contig (variants) uint8 dask.array\n", + " sample_id (samples) object dask.array\n", + "Dimensions without coordinates: variants, alleles, samples, ploidy\n", + "Data variables:\n", + " variant_allele (variants, alleles) |S1 dask.array\n", + " call_genotype (variants, samples, ploidy) int8 dask.array\n", + "Attributes:\n", + " contigs: ('2R', '2L', '3R', '3L', 'X')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.9922709, 0.0077291])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps = ag3.haplotypes(\n", + " region=\"3L\", sample_sets=\"AG1000G-BF-A\", analysis=\"gamb_colu_arab\"\n", + ")\n", + "gt = ds_haps[\"call_genotype\"][:1_000_000].values\n", + "p_hap_01 = np.bincount(gt.flatten())\n", + "p_hap_01 = p_hap_01 / p_hap_01.sum()\n", + "p_hap_01" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.99058904, 0.00941096])" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps = ag3.haplotypes(region=\"3L\", sample_sets=\"AG1000G-BF-A\", analysis=\"gamb_colu\")\n", + "gt = ds_haps[\"call_genotype\"][:1_000_000].values\n", + "p_hap_01 = np.bincount(gt.flatten())\n", + "p_hap_01 = p_hap_01 / p_hap_01.sum()\n", + "p_hap_01" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.94134709, 0.05865291])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_haps = ag3.haplotypes(region=\"3L\", sample_sets=\"AG1000G-UG\", analysis=\"arab\")\n", + "gt = ds_haps[\"call_genotype\"][:1_000_000].values\n", + "p_hap_01 = np.bincount(gt.flatten())\n", + "p_hap_01 = p_hap_01 / p_hap_01.sum()\n", + "p_hap_01" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " ...,\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0]], dtype=int8)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hap_gt_sim = np.random.choice(\n", + " [0, 1], size=(100_000, 100), replace=True, p=p_hap_01\n", + ").astype(\"i1\")\n", + "hap_gt_sim" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([9922335, 77665])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.bincount(hap_gt_sim.flatten())" + ] + }, { "attachments": {}, "cell_type": "markdown", diff --git a/tests/anoph/conftest.py b/tests/anoph/conftest.py index f649d35f2..8c0943ba4 100644 --- a/tests/anoph/conftest.py +++ b/tests/anoph/conftest.py @@ -346,7 +346,7 @@ def simulate_snp_sites(path, contigs, genome): n_sites[contig] = pos.shape[0] zarr.consolidate_metadata(path) - return n_sites + return root, n_sites def simulate_site_filters(path, contigs, p_pass, n_sites): @@ -496,6 +496,41 @@ def simulate_site_annotations(path, genome): zarr.consolidate_metadata(path) +def simulate_hap_sites(path, contigs, snp_sites, p_site): + n_sites = dict() + root = zarr.open(path, mode="w") + + for contig in contigs: + # Obtain variants group. + variants = root.require_group(contig).require_group("variants") + + # Simulate POS. + snp_pos = snp_sites[f"{contig}/variants/POS"][:] + loc_hap_sites = np.random.choice( + [False, True], size=snp_pos.shape[0], p=[1 - p_site, p_site] + ) + pos = snp_pos[loc_hap_sites] + variants.create_dataset(name="POS", data=pos) + + # Simulate REF. + snp_ref = snp_sites[f"{contig}/variants/REF"][:] + ref = snp_ref[loc_hap_sites] + variants.create_dataset(name="REF", data=ref) + + # Simulate ALT. + snp_alt = snp_sites[f"{contig}/variants/ALT"][:] + sim_alt_choice = np.random.choice(3, size=pos.shape[0]) + alt = np.take_along_axis( + snp_alt[loc_hap_sites], indices=sim_alt_choice[:, None], axis=1 + )[:, 0] + variants.create_dataset(name="ALT", data=alt) + + n_sites[contig] = pos.shape[0] + + zarr.consolidate_metadata(path) + return root, n_sites + + class AnophelesSimulator: def __init__( self, @@ -541,6 +576,8 @@ def __init__( self.init_site_filters() self.init_snp_genotypes() self.init_site_annotations() + self.init_hap_sites() + self.init_haplotypes() @property def contigs(self) -> Tuple[str, ...]: @@ -587,6 +624,12 @@ def init_snp_genotypes(self): def init_site_annotations(self): pass + def init_hap_sites(self): + pass + + def init_haplotypes(self): + pass + class Ag3Simulator(AnophelesSimulator): def __init__(self, fixture_dir): @@ -629,7 +672,7 @@ def init_public_release_manifest(self): manifest = pd.DataFrame( { "sample_set": ["AG1000G-AO", "AG1000G-BF-A"], - "sample_count": [randint(10, 60), randint(10, 50)], + "sample_count": [randint(10, 50), randint(10, 40)], } ) manifest.to_csv(manifest_path, index=False, sep="\t") @@ -647,7 +690,8 @@ def init_pre_release_manifest(self): "sample_set": [ "1177-VO-ML-LEHMANN-VMF00004", ], - "sample_count": [randint(10, 70)], + # Make sure we have some gambiae, coluzzii and arabiensis. + "sample_count": [randint(20, 60)], } ) manifest.to_csv(manifest_path, index=False, sep="\t") @@ -658,7 +702,7 @@ def init_genome_sequence(self): # but with much smaller contigs. The data are stored # using zarr as with the real data releases. - # Use real base composition. + # Use real AgamP4 base composition. base_composition = { b"a": 0.042154199245128525, b"c": 0.027760739796444212, @@ -675,8 +719,8 @@ def init_genome_sequence(self): self.genome = simulate_genome( path=path, contigs=self.contigs, - low=100_000, - high=150_000, + low=50_000, + high=100_000, base_composition=base_composition, ) self.contig_sizes = { @@ -799,19 +843,15 @@ def write_metadata(self, release, release_path, sample_set, aim=True, cohorts=Tr def init_metadata(self): self.write_metadata(release="3.0", release_path="v3", sample_set="AG1000G-AO") self.write_metadata(release="3.0", release_path="v3", sample_set="AG1000G-BF-A") - # Simulate situation where AIM and cohorts metadata are missing, - # do we correctly fill? self.write_metadata( release="3.1", release_path="v3.1", sample_set="1177-VO-ML-LEHMANN-VMF00004", - aim=False, - cohorts=False, ) def init_snp_sites(self): path = self.bucket_path / "v3/snp_genotypes/all/sites/" - self.n_sites = simulate_snp_sites( + self.snp_sites, self.n_snp_sites = simulate_snp_sites( path=path, contigs=self.contigs, genome=self.genome ) @@ -823,7 +863,7 @@ def init_site_filters(self): p_pass = 0.71 path = self.bucket_path / "v3/site_filters" / analysis / mask simulate_site_filters( - path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_snp_sites ) # Simulate the arab mask. @@ -831,7 +871,7 @@ def init_site_filters(self): p_pass = 0.70 path = self.bucket_path / "v3/site_filters" / analysis / mask simulate_site_filters( - path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_snp_sites ) # Simulate the gamb_colu_arab mask. @@ -839,7 +879,7 @@ def init_site_filters(self): p_pass = 0.62 path = self.bucket_path / "v3/site_filters" / analysis / mask simulate_site_filters( - path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_snp_sites ) def init_snp_genotypes(self): @@ -879,7 +919,7 @@ def init_snp_genotypes(self): zarr_path=zarr_path, metadata_path=metadata_path, contigs=self.contigs, - n_sites=self.n_sites, + n_sites=self.n_snp_sites, p_allele=p_allele, p_missing=p_missing, ) @@ -888,6 +928,151 @@ def init_site_annotations(self): path = self.bucket_path / self.config["SITE_ANNOTATIONS_ZARR_PATH"] simulate_site_annotations(path=path, genome=self.genome) + def init_hap_sites(self): + self.hap_sites = dict() + self.n_hap_sites = dict() + analysis = "arab" + path = self.bucket_path / "v3/snp_haplotypes/sites/" / analysis / "zarr" + self.hap_sites[analysis], self.n_hap_sites[analysis] = simulate_hap_sites( + path=path, + contigs=self.contigs, + snp_sites=self.snp_sites, + p_site=0.09, + ) + + analysis = "gamb_colu" + path = self.bucket_path / "v3/snp_haplotypes/sites/" / analysis / "zarr" + self.hap_sites[analysis], self.n_hap_sites[analysis] = simulate_hap_sites( + path=path, + contigs=self.contigs, + snp_sites=self.snp_sites, + p_site=0.28, + ) + + analysis = "gamb_colu_arab" + path = self.bucket_path / "v3/snp_haplotypes/sites/" / analysis / "zarr" + self.hap_sites[analysis], self.n_hap_sites[analysis] = simulate_hap_sites( + path=path, + contigs=self.contigs, + snp_sites=self.snp_sites, + p_site=0.25, + ) + + def init_haplotypes(self): + self.phasing_samples = dict() + for release, manifest in self.release_manifests.items(): + # Determine release path. + if release == "3.0": + release_path = "v3" + else: + release_path = f"v{release}" + + # Iterate over sample sets in the release. + for rec in manifest.itertuples(): + sample_set = rec.sample_set + + # Set up access to AIM metadata, to figure out which samples are in + # which analysis. + metadata_path = ( + self.bucket_path + / release_path + / "metadata" + / "species_calls_aim_20220528" + / sample_set + / "samples.species_aim.csv" + ) + df_aim = pd.read_csv(metadata_path) + + # Simulate haplotypes for the gamb_colu_arab analysis. + analysis = "gamb_colu_arab" + p_1 = 0.008 + samples = df_aim["sample_id"].values + self.phasing_samples[sample_set, analysis] = samples + n_samples = len(samples) + zarr_path = ( + self.bucket_path + / release_path + / "snp_haplotypes" + / sample_set + / analysis + / "zarr" + ) + root = zarr.open(zarr_path, mode="w") + root.create_dataset(name="samples", data=samples, dtype=str) + for contig in self.contigs: + n_sites = self.n_hap_sites[analysis][contig] + gt = np.random.choice( + np.array([0, 1], dtype="i1"), + size=(n_sites, n_samples, 2), + replace=True, + p=[1 - p_1, p_1], + ) + calldata = root.require_group(contig).require_group("calldata") + calldata.create_dataset(name="GT", data=gt) + zarr.consolidate_metadata(zarr_path) + + # Simulate haplotypes for the arab analysis. + analysis = "arab" + p_1 = 0.06 + samples = df_aim.query("aim_species == 'arabiensis'")[ + "sample_id" + ].values + self.phasing_samples[sample_set, analysis] = samples + n_samples = len(samples) + if n_samples > 0: + zarr_path = ( + self.bucket_path + / release_path + / "snp_haplotypes" + / sample_set + / analysis + / "zarr" + ) + root = zarr.open(zarr_path, mode="w") + root.create_dataset(name="samples", data=samples, dtype=str) + for contig in self.contigs: + n_sites = self.n_hap_sites[analysis][contig] + gt = np.random.choice( + np.array([0, 1], dtype="i1"), + size=(n_sites, n_samples, 2), + replace=True, + p=[1 - p_1, p_1], + ) + calldata = root.require_group(contig).require_group("calldata") + calldata.create_dataset(name="GT", data=gt) + zarr.consolidate_metadata(zarr_path) + + # Simulate haplotypes for the gamb_colu analysis. + analysis = "gamb_colu" + p_1 = 0.01 + samples = df_aim.query( + "aim_species not in ['arabiensis', 'intermediate_gambcolu_arabiensis']" + )["sample_id"].values + self.phasing_samples[sample_set, analysis] = samples + n_samples = len(samples) + if n_samples > 0: + zarr_path = ( + self.bucket_path + / release_path + / "snp_haplotypes" + / sample_set + / analysis + / "zarr" + ) + root = zarr.open(zarr_path, mode="w") + root.create_dataset(name="samples", data=samples, dtype=str) + for contig in self.contigs: + n_sites = self.n_hap_sites[analysis][contig] + gt = np.random.choice( + np.array([0, 1], dtype="i1"), + size=(n_sites, n_samples, 2), + replace=True, + p=[1 - p_1, p_1], + ) + calldata = root.require_group(contig).require_group("calldata") + calldata.create_dataset(name="GT", data=gt) + zarr.consolidate_metadata(zarr_path) + class Af1Simulator(AnophelesSimulator): def __init__(self, fixture_dir): @@ -933,7 +1118,7 @@ def init_public_release_manifest(self): "1230-VO-GA-CF-AYALA-VMF00045", "1231-VO-MULTI-WONDJI-VMF00043", ], - "sample_count": [36, 50, 32], + "sample_count": [26, 40, 32], } ) manifest.to_csv(manifest_path, index=False, sep="\t") @@ -961,8 +1146,8 @@ def init_genome_sequence(self): self.genome = simulate_genome( path=path, contigs=self.contigs, - low=100_000, - high=200_000, + low=80_000, + high=120_000, base_composition=base_composition, ) self.contig_sizes = { @@ -1080,7 +1265,7 @@ def init_metadata(self): def init_snp_sites(self): path = self.bucket_path / "v1.0/snp_genotypes/all/sites/" - self.n_sites = simulate_snp_sites( + self.snp_sites, self.n_snp_sites = simulate_snp_sites( path=path, contigs=self.contigs, genome=self.genome ) @@ -1092,7 +1277,7 @@ def init_site_filters(self): p_pass = 0.59 path = self.bucket_path / "v1.0/site_filters" / analysis / mask simulate_site_filters( - path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_snp_sites ) def init_snp_genotypes(self): @@ -1129,7 +1314,7 @@ def init_snp_genotypes(self): zarr_path=zarr_path, metadata_path=metadata_path, contigs=self.contigs, - n_sites=self.n_sites, + n_sites=self.n_snp_sites, p_allele=p_allele, p_missing=p_missing, ) @@ -1138,6 +1323,68 @@ def init_site_annotations(self): path = self.bucket_path / self.config["SITE_ANNOTATIONS_ZARR_PATH"] simulate_site_annotations(path=path, genome=self.genome) + def init_hap_sites(self): + self.hap_sites = dict() + self.n_hap_sites = dict() + analysis = "funestus" + path = self.bucket_path / "v1.0/snp_haplotypes/sites/" / analysis / "zarr" + self.hap_sites[analysis], self.n_hap_sites[analysis] = simulate_hap_sites( + path=path, + contigs=self.contigs, + snp_sites=self.snp_sites, + p_site=np.random.random(), + ) + + def init_haplotypes(self): + self.phasing_samples = dict() + for release, manifest in self.release_manifests.items(): + # Determine release path. + release_path = f"v{release}" + + # Iterate over sample sets in the release. + for rec in manifest.itertuples(): + sample_set = rec.sample_set + + # Access sample metadata to find samples. + metadata_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + df_samples = pd.read_csv(metadata_path) + samples = df_samples["sample_id"].values + + # Simulate haplotypes. + analysis = "funestus" + p_1 = np.random.random() + samples = df_samples["sample_id"].values + self.phasing_samples[sample_set, analysis] = samples + n_samples = len(samples) + zarr_path = ( + self.bucket_path + / release_path + / "snp_haplotypes" + / sample_set + / analysis + / "zarr" + ) + root = zarr.open(zarr_path, mode="w") + root.create_dataset(name="samples", data=samples, dtype=str) + for contig in self.contigs: + n_sites = self.n_hap_sites[analysis][contig] + gt = np.random.choice( + np.array([0, 1], dtype="i1"), + size=(n_sites, n_samples, 2), + replace=True, + p=[1 - p_1, p_1], + ) + calldata = root.require_group(contig).require_group("calldata") + calldata.create_dataset(name="GT", data=gt) + zarr.consolidate_metadata(zarr_path) + # For the following data fixtures we will use the "session" scope # so that the fixture data will be created only once per test diff --git a/tests/anoph/fixture/missing_metadata/config.json b/tests/anoph/fixture/missing_metadata/config.json new file mode 100644 index 000000000..1240d5c78 --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/config.json @@ -0,0 +1,7 @@ +{ + "PUBLIC_RELEASES": [ + "3.0" + ], + "DEFAULT_AIM_ANALYSIS": "20220528", + "DEFAULT_COHORTS_ANALYSIS": "20230223" +} diff --git a/tests/anoph/fixture/missing_metadata/v3/manifest.tsv b/tests/anoph/fixture/missing_metadata/v3/manifest.tsv new file mode 100644 index 000000000..a2adcb4e4 --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/manifest.tsv @@ -0,0 +1,3 @@ +sample_set sample_count +AG1000G-AO 81 +AG1000G-BF-A 181 diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.admin_units.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.admin_units.csv new file mode 100644 index 000000000..3344fe83f --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.admin_units.csv @@ -0,0 +1,82 @@ +sample_id,country,country_ISO,adm1_name,adm1_ISO,adm2_name +AR0047-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0049-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0051-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0061-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0078-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0080-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0084-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0097-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0072-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0094-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0095-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0083-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0093-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0021-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0082-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0008-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0085-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0098-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0092-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0017-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0015-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0019-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0100-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0034-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0086-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0057-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0076-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0042-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0063-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0012-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0087-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0065-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0038-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0089-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0071-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0096-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0088-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0066-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0023-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0020-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0024-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0014-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0079-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0027-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0075-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0077-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0007-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0062-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0060-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0022-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0002-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0059-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0048-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0011-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0009-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0043-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0035-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0074-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0045-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0073-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0004-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0040-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0052-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0064-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0044-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0036-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0001-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0006-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0046-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0070-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0010-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0090-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0054-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0016-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0050-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0069-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0018-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0081-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0013-Cx,Angola,AGO,Luanda,AO-LUA,Luanda +AR0026-C,Angola,AGO,Luanda,AO-LUA,Luanda +AR0053-C,Angola,AGO,Luanda,AO-LUA,Luanda diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.cohorts.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.cohorts.csv new file mode 100644 index 000000000..ada4a8047 --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.cohorts.csv @@ -0,0 +1,82 @@ +sample_id,country_ISO,adm1_name,adm1_ISO,adm2_name,taxon,cohort_admin1_year,cohort_admin1_month,cohort_admin1_quarter,cohort_admin2_year,cohort_admin2_month,cohort_admin2_quarter +AR0047-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0049-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0051-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0061-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0078-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0080-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0084-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0097-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0072-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0094-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0095-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0083-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0093-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0021-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0082-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0008-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0085-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0098-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0092-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0017-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0015-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0019-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0100-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0034-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0086-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0057-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0076-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0042-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0063-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0012-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0087-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0065-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0038-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0089-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0071-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0096-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0088-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0066-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0023-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0020-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0024-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0014-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0079-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0027-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0075-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0077-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0007-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0062-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0060-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0022-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0002-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0059-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0048-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0011-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0009-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0043-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0035-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0074-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0045-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0073-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0004-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0040-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0052-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0064-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0044-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0036-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0001-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0006-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0046-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0070-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0010-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0090-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0054-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0016-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0050-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0069-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0018-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0081-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0013-Cx,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0026-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 +AR0053-C,AGO,Luanda,AO-LUA,Luanda,coluzzii,AO-LUA_colu_2009,AO-LUA_colu_2009_04,AO-LUA_colu_2009_Q2,AO-LUA_Luanda_colu_2009,AO-LUA_Luanda_colu_2009_04,AO-LUA_Luanda_colu_2009_Q2 diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.taxa.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.taxa.csv new file mode 100644 index 000000000..2a84ec40f --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/cohorts_20230223/AG1000G-AO/samples.taxa.csv @@ -0,0 +1,82 @@ +sample_id,taxon +AR0047-C,coluzzii +AR0049-C,coluzzii +AR0051-C,coluzzii +AR0061-C,coluzzii +AR0078-C,coluzzii +AR0080-C,coluzzii +AR0084-C,coluzzii +AR0097-C,coluzzii +AR0072-C,coluzzii +AR0094-C,coluzzii +AR0095-C,coluzzii +AR0083-C,coluzzii +AR0093-C,coluzzii +AR0021-C,coluzzii +AR0082-C,coluzzii +AR0008-C,coluzzii +AR0085-C,coluzzii +AR0098-C,coluzzii +AR0092-C,coluzzii +AR0017-C,coluzzii +AR0015-C,coluzzii +AR0019-C,coluzzii +AR0100-C,coluzzii +AR0034-C,coluzzii +AR0086-C,coluzzii +AR0057-C,coluzzii +AR0076-C,coluzzii +AR0042-C,coluzzii +AR0063-C,coluzzii +AR0012-C,coluzzii +AR0087-C,coluzzii +AR0065-C,coluzzii +AR0038-C,coluzzii +AR0089-C,coluzzii +AR0071-C,coluzzii +AR0096-C,coluzzii +AR0088-C,coluzzii +AR0066-C,coluzzii +AR0023-C,coluzzii +AR0020-C,coluzzii +AR0024-C,coluzzii +AR0014-C,coluzzii +AR0079-C,coluzzii +AR0027-C,coluzzii +AR0075-C,coluzzii +AR0077-C,coluzzii +AR0007-C,coluzzii +AR0062-C,coluzzii +AR0060-C,coluzzii +AR0022-C,coluzzii +AR0002-C,coluzzii +AR0059-C,coluzzii +AR0048-C,coluzzii +AR0011-C,coluzzii +AR0009-C,coluzzii +AR0043-C,coluzzii +AR0035-C,coluzzii +AR0074-C,coluzzii +AR0045-C,coluzzii +AR0073-C,coluzzii +AR0004-C,coluzzii +AR0040-C,coluzzii +AR0052-C,coluzzii +AR0064-C,coluzzii +AR0044-C,coluzzii +AR0036-C,coluzzii +AR0001-C,coluzzii +AR0006-C,coluzzii +AR0046-C,coluzzii +AR0070-Cx,coluzzii +AR0010-Cx,coluzzii +AR0090-Cx,coluzzii +AR0054-Cx,coluzzii +AR0016-Cx,coluzzii +AR0050-Cx,coluzzii +AR0069-Cx,coluzzii +AR0018-Cx,coluzzii +AR0081-Cx,coluzzii +AR0013-Cx,coluzzii +AR0026-C,coluzzii +AR0053-C,coluzzii diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/samples.meta.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/samples.meta.csv new file mode 100644 index 000000000..8228c971c --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/samples.meta.csv @@ -0,0 +1,82 @@ +sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call +AR0047-C,LUA047,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0049-C,LUA049,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0051-C,LUA051,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0061-C,LUA061,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0078-C,LUA078,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0080-C,LUA080,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0084-C,LUA084,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0097-C,LUA097,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0072-C,LUA072,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0094-C,LUA094,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,M +AR0095-C,LUA095,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0083-C,LUA083,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0093-C,LUA093,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0021-C,LUA021,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0082-C,LUA082,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0008-C,LUA008,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0085-C,LUA085,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0098-C,LUA098,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0092-C,LUA092,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0017-C,LUA017,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0015-C,LUA015,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0019-C,LUA019,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0100-C,LUA100,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0034-C,LUA034,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0086-C,LUA086,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0057-C,LUA057,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0076-C,LUA076,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0042-C,LUA042,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0063-C,LUA063,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0012-C,LUA012,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0087-C,LUA087,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0065-C,LUA065,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0038-C,LUA038,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0089-C,LUA089,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0071-C,LUA071,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0096-C,LUA096,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0088-C,LUA088,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0066-C,LUA066,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0023-C,LUA023,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0020-C,LUA020,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0024-C,LUA024,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0014-C,LUA014,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0079-C,LUA079,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0027-C,LUA027,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0075-C,LUA075,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0077-C,LUA077,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0007-C,LUA007,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0062-C,LUA062,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0060-C,LUA060,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0022-C,LUA022,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0002-C,LUA002,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,M +AR0059-C,LUA059,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0048-C,LUA048,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0011-C,LUA011,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0009-C,LUA009,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0043-C,LUA043,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0035-C,LUA035,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0074-C,LUA074,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0045-C,LUA045,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0073-C,LUA073,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0004-C,LUA004,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,M +AR0040-C,LUA040,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0052-C,LUA052,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0064-C,LUA064,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0044-C,LUA044,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0036-C,LUA036,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0001-C,LUA001,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0006-C,LUA006,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,M +AR0046-C,LUA046,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0070-Cx,LUA070,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0010-Cx,LUA010,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0090-Cx,LUA090,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0054-Cx,LUA054,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0016-Cx,LUA016,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0050-Cx,LUA050,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0069-Cx,LUA069,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0018-Cx,LUA018,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0081-Cx,LUA081,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0013-Cx,LUA013,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0026-C,LUA026,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F +AR0053-C,LUA053,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/wgs_snp_data.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/wgs_snp_data.csv new file mode 100644 index 000000000..ecf5211ad --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-AO/wgs_snp_data.csv @@ -0,0 +1,82 @@ +sample_id,alignments_bam,alignments_bam_md5,snp_genotypes_vcf,snp_genotypes_vcf_md5,snp_genotypes_zarr,snp_genotypes_zarr_md5,pipeline_version,original_sample_id,seq_state,build_phase1,build_phase2,ebi_sample_acc,partner_sample_id,contributor,country,location,year,month,sample_set +AR0047-C,https://vo_agam_output.cog.sanger.ac.uk/AR0047-C.bam,5b55670b60759344a5e88801ada8d756,https://vo_agam_output.cog.sanger.ac.uk/AR0047-C.vcf.gz,39775ef5d71e42cfdecd94d8943d81e7,https://vo_agam_output.cog.sanger.ac.uk/AR0047-C.gatk.zarr.zip,a02c5f9962e7fb5491f2ea196026e87a,vr-pipe-vo-agam,AR0047,sequenced,True,True,ERS224005,LUA047,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0049-C,https://vo_agam_output.cog.sanger.ac.uk/AR0049-C.bam,f6590f73ba79cf24492dbbd07c019ded,https://vo_agam_output.cog.sanger.ac.uk/AR0049-C.vcf.gz,80dbe79f6f2863b213455053b66073b8,https://vo_agam_output.cog.sanger.ac.uk/AR0049-C.gatk.zarr.zip,0765ceaaf4f943c36e592bf1358deb52,vr-pipe-vo-agam,AR0049,sequenced,True,True,ERS224019,LUA049,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0051-C,https://vo_agam_output.cog.sanger.ac.uk/AR0051-C.bam,926b06ee5e9ee629100000803e458604,https://vo_agam_output.cog.sanger.ac.uk/AR0051-C.vcf.gz,d87d623c200f23db8bde94c8e2923f67,https://vo_agam_output.cog.sanger.ac.uk/AR0051-C.gatk.zarr.zip,73f1f4c0b2fadbe2cefa0c9ab7bdee01,vr-pipe-vo-agam,AR0051,sequenced,True,True,ERS224026,LUA051,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0061-C,https://vo_agam_output.cog.sanger.ac.uk/AR0061-C.bam,0810ecd5f9a77fb067443f8a62f6ec1a,https://vo_agam_output.cog.sanger.ac.uk/AR0061-C.vcf.gz,94d8e9ca2135ace9044260705b56a629,https://vo_agam_output.cog.sanger.ac.uk/AR0061-C.gatk.zarr.zip,353564cba53319451c296a1a039e9967,vr-pipe-vo-agam,AR0061,sequenced,True,True,ERS224033,LUA061,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0078-C,https://vo_agam_output.cog.sanger.ac.uk/AR0078-C.bam,846dcd10e24b384554f32fdb528f8ca1,https://vo_agam_output.cog.sanger.ac.uk/AR0078-C.vcf.gz,5ae280c8d88b455c06c1f3862d214fe8,https://vo_agam_output.cog.sanger.ac.uk/AR0078-C.gatk.zarr.zip,dcae9aade7413d1e81cec192c1bcd8aa,vr-pipe-vo-agam,AR0078,sequenced,True,True,ERS224040,LUA078,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0080-C,https://vo_agam_output.cog.sanger.ac.uk/AR0080-C.bam,b96ba5b2db1c4a7116bef0d5516931a7,https://vo_agam_output.cog.sanger.ac.uk/AR0080-C.vcf.gz,94ac7e79db9b0b1e1a06c7aac910d5b9,https://vo_agam_output.cog.sanger.ac.uk/AR0080-C.gatk.zarr.zip,14c25c388e8394f2eb64c4d798fd2d07,vr-pipe-vo-agam,AR0080,sequenced,True,True,ERS224047,LUA080,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0084-C,https://vo_agam_output.cog.sanger.ac.uk/AR0084-C.bam,1835c85df512168cf4c4fb8dc4dfebc3,https://vo_agam_output.cog.sanger.ac.uk/AR0084-C.vcf.gz,cd9272a72e1fe9c5f8e2f428d50e8393,https://vo_agam_output.cog.sanger.ac.uk/AR0084-C.gatk.zarr.zip,fa6b7aa90bf923c12c7e242317ea0afc,vr-pipe-vo-agam,AR0084,sequenced,True,True,ERS224054,LUA084,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0097-C,https://vo_agam_output.cog.sanger.ac.uk/AR0097-C.bam,c23f8bafc6120298187a813aae0f00cf,https://vo_agam_output.cog.sanger.ac.uk/AR0097-C.vcf.gz,2c0a869ea0f7b072f92331ac66a5a9bd,https://vo_agam_output.cog.sanger.ac.uk/AR0097-C.gatk.zarr.zip,40c861f3c1fffba8de68dfcead03d9bf,vr-pipe-vo-agam,AR0097,sequenced,False,True,ERS224061,LUA097,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0072-C,https://vo_agam_output.cog.sanger.ac.uk/AR0072-C.bam,a636559a26302e09ae6c8af359275d4e,https://vo_agam_output.cog.sanger.ac.uk/AR0072-C.vcf.gz,50c3782cd9dd92916afe4c7e696d31c5,https://vo_agam_output.cog.sanger.ac.uk/AR0072-C.gatk.zarr.zip,32b846974e14a0c63a475f7cffdf8ec8,vr-pipe-vo-agam,AR0072,sequenced,True,True,ERS224075,LUA072,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0094-C,https://vo_agam_output.cog.sanger.ac.uk/AR0094-C.bam,ce9dc28b801840ae24a6c11db8660c18,https://vo_agam_output.cog.sanger.ac.uk/AR0094-C.vcf.gz,b6de65aa1b3146918217b5a61511dd1d,https://vo_agam_output.cog.sanger.ac.uk/AR0094-C.gatk.zarr.zip,f2d9e1f0eabda04fae20e13b3dff460c,vr-pipe-vo-agam,AR0094,sequenced,True,True,ERS224082,LUA094,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0095-C,https://vo_agam_output.cog.sanger.ac.uk/AR0095-C.bam,12cbf8a8b02bcb055454945e6f35b1e6,https://vo_agam_output.cog.sanger.ac.uk/AR0095-C.vcf.gz,52a7cf8265a23ddbc84a76329e340bc0,https://vo_agam_output.cog.sanger.ac.uk/AR0095-C.gatk.zarr.zip,6cdb5e6cd535819a5f62be17a92999cf,vr-pipe-vo-agam,AR0095,sequenced,True,True,ERS224097,LUA095,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0083-C,https://vo_agam_output.cog.sanger.ac.uk/AR0083-C.bam,b38dcb0c8776ec834c05af7757fe7d3d,https://vo_agam_output.cog.sanger.ac.uk/AR0083-C.vcf.gz,c07615738803927b6312984682003dd2,https://vo_agam_output.cog.sanger.ac.uk/AR0083-C.gatk.zarr.zip,b54ece2573c9d99784c249755c6eec72,vr-pipe-vo-agam,AR0083,sequenced,True,True,ERS224120,LUA083,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0093-C,https://vo_agam_output.cog.sanger.ac.uk/AR0093-C.bam,8fd48e2017c31db228de73de9593c5bc,https://vo_agam_output.cog.sanger.ac.uk/AR0093-C.vcf.gz,092ff5b323ad2f1543ae7f2bb5cf0f4a,https://vo_agam_output.cog.sanger.ac.uk/AR0093-C.gatk.zarr.zip,0ce4435aeb6bd4b52e17355e861878e3,vr-pipe-vo-agam,AR0093,sequenced,True,True,ERS224121,LUA093,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0021-C,https://vo_agam_output.cog.sanger.ac.uk/AR0021-C.bam,42c7edb58ff40366dd5503892bb77ccb,https://vo_agam_output.cog.sanger.ac.uk/AR0021-C.vcf.gz,4607512a0f1904538663b56e506818af,https://vo_agam_output.cog.sanger.ac.uk/AR0021-C.gatk.zarr.zip,ed38f22b8d5c63926c6fd58334dd8f74,vr-pipe-vo-agam,AR0021,sequenced,True,True,ERS224126,LUA021,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0082-C,https://vo_agam_output.cog.sanger.ac.uk/AR0082-C.bam,54c8e74223a529cbcf74d8db3eafdfc1,https://vo_agam_output.cog.sanger.ac.uk/AR0082-C.vcf.gz,587df0b593f8a27b1c40c7302cd1caef,https://vo_agam_output.cog.sanger.ac.uk/AR0082-C.gatk.zarr.zip,7fb48188009746a2830410f392a22784,vr-pipe-vo-agam,AR0082,sequenced,False,True,ERS224128,LUA082,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0008-C,https://vo_agam_output.cog.sanger.ac.uk/AR0008-C.bam,35739a5676b113bbba5ecb60a8fa5098,https://vo_agam_output.cog.sanger.ac.uk/AR0008-C.vcf.gz,9e2006bbd9ed2e009b47549af36662c5,https://vo_agam_output.cog.sanger.ac.uk/AR0008-C.gatk.zarr.zip,1f1243dcc96b3df786a623f9736cf11b,vr-pipe-vo-agam,AR0008,sequenced,True,True,ERS224129,LUA008,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0085-C,https://vo_agam_output.cog.sanger.ac.uk/AR0085-C.bam,bdd6f36b8858325fa538c03474aa5d25,https://vo_agam_output.cog.sanger.ac.uk/AR0085-C.vcf.gz,354d78c15025fc1916ac5f125e7357f8,https://vo_agam_output.cog.sanger.ac.uk/AR0085-C.gatk.zarr.zip,e2e7da59a5f0c2cc4856cbf63382091c,vr-pipe-vo-agam,AR0085,sequenced,False,True,ERS224164,LUA085,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0098-C,https://vo_agam_output.cog.sanger.ac.uk/AR0098-C.bam,b7a35bbfefc7812e0109d41957451169,https://vo_agam_output.cog.sanger.ac.uk/AR0098-C.vcf.gz,3ced26e2b38503195637749cd4fce07a,https://vo_agam_output.cog.sanger.ac.uk/AR0098-C.gatk.zarr.zip,b1caf2442ade7279e229175feb70ff08,vr-pipe-vo-agam,AR0098,sequenced,True,True,ERS224098,LUA098,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0092-C,https://vo_agam_output.cog.sanger.ac.uk/AR0092-C.bam,6d92b6e46c8d22bbb86e634ec4fee992,https://vo_agam_output.cog.sanger.ac.uk/AR0092-C.vcf.gz,bd138a28507265a693795d2e3810d7d0,https://vo_agam_output.cog.sanger.ac.uk/AR0092-C.gatk.zarr.zip,6945da0336b0ac8716f8d168b520be6b,vr-pipe-vo-agam,AR0092,sequenced,True,True,ERS224123,LUA092,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0017-C,https://vo_agam_output.cog.sanger.ac.uk/AR0017-C.bam,118a6ff9ed9254c6d1309f434a9d83fe,https://vo_agam_output.cog.sanger.ac.uk/AR0017-C.vcf.gz,6717445d7d4f25036e0c8e696656d038,https://vo_agam_output.cog.sanger.ac.uk/AR0017-C.gatk.zarr.zip,cff8b770ed6ee033696ee7c63a52b7dd,vr-pipe-vo-agam,AR0017,sequenced,True,True,ERS224124,LUA017,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0015-C,https://vo_agam_output.cog.sanger.ac.uk/AR0015-C.bam,4871a4309177c498802aa4033a520b27,https://vo_agam_output.cog.sanger.ac.uk/AR0015-C.vcf.gz,f0f909d8496e146e41cd54f55024fd0c,https://vo_agam_output.cog.sanger.ac.uk/AR0015-C.gatk.zarr.zip,a3f1ca1a0c50b8393555a77dd807103c,vr-pipe-vo-agam,AR0015,sequenced,True,True,ERS224131,LUA015,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0019-C,https://vo_agam_output.cog.sanger.ac.uk/AR0019-C.bam,7118b077c4fcc82c78e9010ffd6f5867,https://vo_agam_output.cog.sanger.ac.uk/AR0019-C.vcf.gz,94bc84e7b1ea2d7fb493bdf182b9663f,https://vo_agam_output.cog.sanger.ac.uk/AR0019-C.gatk.zarr.zip,d88b140b9f9c8b6a9f57145efacab0a3,vr-pipe-vo-agam,AR0019,sequenced,True,True,ERS224138,LUA019,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0100-C,https://vo_agam_output.cog.sanger.ac.uk/AR0100-C.bam,3a48d6d91c316b0e9e34adf0b181cfa2,https://vo_agam_output.cog.sanger.ac.uk/AR0100-C.vcf.gz,d45fb6335ffb7fb853548b08a55d321d,https://vo_agam_output.cog.sanger.ac.uk/AR0100-C.gatk.zarr.zip,2ed294ad4a9338bec907fd02538e968e,vr-pipe-vo-agam,AR0100,sequenced,True,True,ERS224144,LUA100,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0034-C,https://vo_agam_output.cog.sanger.ac.uk/AR0034-C.bam,a693923a43965b7f7027b734421edc7a,https://vo_agam_output.cog.sanger.ac.uk/AR0034-C.vcf.gz,9647aa9f6a96a8340d15c71ea01ed59c,https://vo_agam_output.cog.sanger.ac.uk/AR0034-C.gatk.zarr.zip,bf8e4fd04abe3ffe37f8debbbe1a06ec,vr-pipe-vo-agam,AR0034,sequenced,True,True,ERS224145,LUA034,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0086-C,https://vo_agam_output.cog.sanger.ac.uk/AR0086-C.bam,18b7fb517ffafdeaa266d6cbaf2c896e,https://vo_agam_output.cog.sanger.ac.uk/AR0086-C.vcf.gz,e1c419b1996a524e66e109d8617b0ef5,https://vo_agam_output.cog.sanger.ac.uk/AR0086-C.gatk.zarr.zip,d61843e28acc0193ff44e903244a62a5,vr-pipe-vo-agam,AR0086,sequenced,True,True,ERS224152,LUA086,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0057-C,https://vo_agam_output.cog.sanger.ac.uk/AR0057-C.bam,44225626e17a15b4fbe1b6b316a075bc,https://vo_agam_output.cog.sanger.ac.uk/AR0057-C.vcf.gz,8bbbbf0148580118352329dbfebd117e,https://vo_agam_output.cog.sanger.ac.uk/AR0057-C.gatk.zarr.zip,d316be2fb413619793bd0fee6ff03eeb,vr-pipe-vo-agam,AR0057,sequenced,True,True,ERS224175,LUA057,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0076-C,https://vo_agam_output.cog.sanger.ac.uk/AR0076-C.bam,0320ea12a74a5a6c3f1de331bfccfc2d,https://vo_agam_output.cog.sanger.ac.uk/AR0076-C.vcf.gz,db3a7701becef1cd940c2e123092fe72,https://vo_agam_output.cog.sanger.ac.uk/AR0076-C.gatk.zarr.zip,77669e5cc6b9841827bd35831bc461c8,vr-pipe-vo-agam,AR0076,sequenced,True,True,ERS224176,LUA076,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0042-C,https://vo_agam_output.cog.sanger.ac.uk/AR0042-C.bam,d9591e908bbc17cc50a80e8a185a52d3,https://vo_agam_output.cog.sanger.ac.uk/AR0042-C.vcf.gz,317ef58393e1fb134e7f9e6e7558aac6,https://vo_agam_output.cog.sanger.ac.uk/AR0042-C.gatk.zarr.zip,e40d7fc0f9bd109c62b9cec136c64170,vr-pipe-vo-agam,AR0042,sequenced,True,True,ERS224189,LUA042,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0063-C,https://vo_agam_output.cog.sanger.ac.uk/AR0063-C.bam,c971747ef608f365298cd623669de7ea,https://vo_agam_output.cog.sanger.ac.uk/AR0063-C.vcf.gz,a124356819c11df7ef57e07ff6bdcd4d,https://vo_agam_output.cog.sanger.ac.uk/AR0063-C.gatk.zarr.zip,e09336e6dd939914d5cc2254ee653956,vr-pipe-vo-agam,AR0063,sequenced,True,True,ERS224196,LUA063,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0012-C,https://vo_agam_output.cog.sanger.ac.uk/AR0012-C.bam,b8b7b788a3d8ecd7c7fca7cda9e952d7,https://vo_agam_output.cog.sanger.ac.uk/AR0012-C.vcf.gz,b8d5dba7459ad487df21e13071d1c287,https://vo_agam_output.cog.sanger.ac.uk/AR0012-C.gatk.zarr.zip,f52e06a7bbb04f6876f7ae4f3571ba52,vr-pipe-vo-agam,AR0012,sequenced,True,True,ERS224230,LUA012,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0087-C,https://vo_agam_output.cog.sanger.ac.uk/AR0087-C.bam,5941873ef551eb2e3bccc9f1539f0dd7,https://vo_agam_output.cog.sanger.ac.uk/AR0087-C.vcf.gz,512aaf9ad6a92e7362d719cf4a04b194,https://vo_agam_output.cog.sanger.ac.uk/AR0087-C.gatk.zarr.zip,a65f9d8329a3f94ac65a19d95ebdb16d,vr-pipe-vo-agam,AR0087,sequenced,True,True,ERS224231,LUA087,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0065-C,https://vo_agam_output.cog.sanger.ac.uk/AR0065-C.bam,0ad5eb33a281d10cce16a36a190b4437,https://vo_agam_output.cog.sanger.ac.uk/AR0065-C.vcf.gz,d1824a320dcf89f6139ed3e72255889f,https://vo_agam_output.cog.sanger.ac.uk/AR0065-C.gatk.zarr.zip,5a10cd9db82a74ffd18773a040cbf19a,vr-pipe-vo-agam,AR0065,sequenced,True,True,ERS224237,LUA065,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0038-C,https://vo_agam_output.cog.sanger.ac.uk/AR0038-C.bam,51a25f6daf85676f1ba9bcf51d1653bd,https://vo_agam_output.cog.sanger.ac.uk/AR0038-C.vcf.gz,fac7a6810617867e99a69a7b73d64355,https://vo_agam_output.cog.sanger.ac.uk/AR0038-C.gatk.zarr.zip,9c89f82f769786fe6e906f3e33e27344,vr-pipe-vo-agam,AR0038,sequenced,False,True,ERS224172,LUA038,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0089-C,https://vo_agam_output.cog.sanger.ac.uk/AR0089-C.bam,ff5c7296cb92ff83e592ef8d6f792a47,https://vo_agam_output.cog.sanger.ac.uk/AR0089-C.vcf.gz,65d96a9e4d3792022d938ae4343e0af8,https://vo_agam_output.cog.sanger.ac.uk/AR0089-C.gatk.zarr.zip,051e91962974a1e1c0f38fa842d0f9d7,vr-pipe-vo-agam,AR0089,sequenced,True,True,ERS224186,LUA089,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0071-C,https://vo_agam_output.cog.sanger.ac.uk/AR0071-C.bam,e51da72a69021914e717ac205d32d759,https://vo_agam_output.cog.sanger.ac.uk/AR0071-C.vcf.gz,cad8c095e4fb20c85027ac582864fe1f,https://vo_agam_output.cog.sanger.ac.uk/AR0071-C.gatk.zarr.zip,64afd6a6b7607c20bd36370bb1fefb48,vr-pipe-vo-agam,AR0071,sequenced,True,True,ERS224194,LUA071,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0096-C,https://vo_agam_output.cog.sanger.ac.uk/AR0096-C.bam,5dc4109dc2bd050d21d5eb2893b211a5,https://vo_agam_output.cog.sanger.ac.uk/AR0096-C.vcf.gz,ad65e44ff15aebfc92bbb855a4c1fecd,https://vo_agam_output.cog.sanger.ac.uk/AR0096-C.gatk.zarr.zip,32a2553af9bccee1adb622baad05fd47,vr-pipe-vo-agam,AR0096,sequenced,True,True,ERS224201,LUA096,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0088-C,https://vo_agam_output.cog.sanger.ac.uk/AR0088-C.bam,4f581d1d4793b7291d7a3829aefafa6b,https://vo_agam_output.cog.sanger.ac.uk/AR0088-C.vcf.gz,b5ade33493062c2a848965bf917844eb,https://vo_agam_output.cog.sanger.ac.uk/AR0088-C.gatk.zarr.zip,8a20cb1a24d7ea1e1f2d7b3e3cfc3327,vr-pipe-vo-agam,AR0088,sequenced,False,True,ERS224215,LUA088,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0066-C,https://vo_agam_output.cog.sanger.ac.uk/AR0066-C.bam,7bb07b684ba1e0d842e3ddfb3ed89f9e,https://vo_agam_output.cog.sanger.ac.uk/AR0066-C.vcf.gz,10cf9f769ca018f3262544eff0e427ff,https://vo_agam_output.cog.sanger.ac.uk/AR0066-C.gatk.zarr.zip,b64a7cdab2f9ca3630b409f083c1507f,vr-pipe-vo-agam,AR0066,sequenced,True,True,ERS224236,LUA066,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0023-C,https://vo_agam_output.cog.sanger.ac.uk/AR0023-C.bam,255653109f257ea2aa1bca0dab55346c,https://vo_agam_output.cog.sanger.ac.uk/AR0023-C.vcf.gz,c46d7bf8008084d38b5c1e0693954acb,https://vo_agam_output.cog.sanger.ac.uk/AR0023-C.gatk.zarr.zip,397035d3b1e5d97fa675d2eace862d24,vr-pipe-vo-agam,AR0023,sequenced,True,True,ERS224251,LUA023,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0020-C,https://vo_agam_output.cog.sanger.ac.uk/AR0020-C.bam,a1ed904286728d120c2c1c51fa87d4df,https://vo_agam_output.cog.sanger.ac.uk/AR0020-C.vcf.gz,72fec4771406f30b327823e9656f4cfc,https://vo_agam_output.cog.sanger.ac.uk/AR0020-C.gatk.zarr.zip,508c02165e1102a09eb983ad509b94c7,vr-pipe-vo-agam,AR0020,sequenced,True,True,ERS224267,LUA020,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0024-C,https://vo_agam_output.cog.sanger.ac.uk/AR0024-C.bam,70eb2e190831d5caf790be0541b34743,https://vo_agam_output.cog.sanger.ac.uk/AR0024-C.vcf.gz,9d0e56b1e54e2e0d24b6ebb9acac1122,https://vo_agam_output.cog.sanger.ac.uk/AR0024-C.gatk.zarr.zip,61f08a56970b2c52d289d340a14b5651,vr-pipe-vo-agam,AR0024,sequenced,True,True,ERS224280,LUA024,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0014-C,https://vo_agam_output.cog.sanger.ac.uk/AR0014-C.bam,3e8fe1e66d1a6e1884048119f740d51e,https://vo_agam_output.cog.sanger.ac.uk/AR0014-C.vcf.gz,33dec2a5a75b615c2aa0e398ffbf4d7f,https://vo_agam_output.cog.sanger.ac.uk/AR0014-C.gatk.zarr.zip,5c7938c24f2413a40ed37a77b15e6f7b,vr-pipe-vo-agam,AR0014,sequenced,True,True,ERS224282,LUA014,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0079-C,https://vo_agam_output.cog.sanger.ac.uk/AR0079-C.bam,ef5f83fc14930de5299d41ff92487d35,https://vo_agam_output.cog.sanger.ac.uk/AR0079-C.vcf.gz,798d0e32d49298b7f9bc60cd001ca0c3,https://vo_agam_output.cog.sanger.ac.uk/AR0079-C.gatk.zarr.zip,37b80dfc69edb2728436a0dc554f949b,vr-pipe-vo-agam,AR0079,sequenced,True,True,ERS224321,LUA079,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0027-C,https://vo_agam_output.cog.sanger.ac.uk/AR0027-C.bam,c6271aaa7fcf43bb96d7814d7fc6c5bf,https://vo_agam_output.cog.sanger.ac.uk/AR0027-C.vcf.gz,208a448bddbba05354a827cbc8ea348f,https://vo_agam_output.cog.sanger.ac.uk/AR0027-C.gatk.zarr.zip,9a1f6a0499ec8ccb2e7f883565c789a3,vr-pipe-vo-agam,AR0027,sequenced,True,True,ERS224329,LUA027,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0075-C,https://vo_agam_output.cog.sanger.ac.uk/AR0075-C.bam,121cb4b05e92d16dcfce768b937f393d,https://vo_agam_output.cog.sanger.ac.uk/AR0075-C.vcf.gz,98ad19e37be5bb8bc8a0b6217c13ba87,https://vo_agam_output.cog.sanger.ac.uk/AR0075-C.gatk.zarr.zip,28331e00d674657a3e7552d19a6c8660,vr-pipe-vo-agam,AR0075,sequenced,True,True,ERS224330,LUA075,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0077-C,https://vo_agam_output.cog.sanger.ac.uk/AR0077-C.bam,9bedc05b5cb6c69ca4ac3de75e39a5c9,https://vo_agam_output.cog.sanger.ac.uk/AR0077-C.vcf.gz,06674815baba737d131e815bcbf6cc5e,https://vo_agam_output.cog.sanger.ac.uk/AR0077-C.gatk.zarr.zip,8f14fe86c7f3f701c7986853787fd98f,vr-pipe-vo-agam,AR0077,sequenced,False,True,ERS224255,LUA077,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0007-C,https://vo_agam_output.cog.sanger.ac.uk/AR0007-C.bam,ea18855e1c67b5848a15d9aebd4886ef,https://vo_agam_output.cog.sanger.ac.uk/AR0007-C.vcf.gz,0765d752ab1dc7412bd7d9a95b220b1d,https://vo_agam_output.cog.sanger.ac.uk/AR0007-C.gatk.zarr.zip,8fffa95a350e19b5d4b4379e0720d4c4,vr-pipe-vo-agam,AR0007,sequenced,True,True,ERS224256,LUA007,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0062-C,https://vo_agam_output.cog.sanger.ac.uk/AR0062-C.bam,bdbe4aee18ccb6df434530a0327e78ac,https://vo_agam_output.cog.sanger.ac.uk/AR0062-C.vcf.gz,6d6993c18dd01b6fa890ab643ff7e5d0,https://vo_agam_output.cog.sanger.ac.uk/AR0062-C.gatk.zarr.zip,940148c6f73b189dcc8e750ddb6fee47,vr-pipe-vo-agam,AR0062,sequenced,True,True,ERS224257,LUA062,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0060-C,https://vo_agam_output.cog.sanger.ac.uk/AR0060-C.bam,427a0e486016173be704c1b1961821bb,https://vo_agam_output.cog.sanger.ac.uk/AR0060-C.vcf.gz,e590858cf84f4d9f05a0eb24668ff5f9,https://vo_agam_output.cog.sanger.ac.uk/AR0060-C.gatk.zarr.zip,a07772044d394299be291eeed9723339,vr-pipe-vo-agam,AR0060,sequenced,False,True,ERS224269,LUA060,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0022-C,https://vo_agam_output.cog.sanger.ac.uk/AR0022-C.bam,95fc4883f88f06ece4df150f84e62b5f,https://vo_agam_output.cog.sanger.ac.uk/AR0022-C.vcf.gz,42d667c02775f46be6525b76bf76fdb6,https://vo_agam_output.cog.sanger.ac.uk/AR0022-C.gatk.zarr.zip,fbc45c0d25005c5d339e6ebc3156ecdc,vr-pipe-vo-agam,AR0022,sequenced,True,True,ERS224289,LUA022,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0002-C,https://vo_agam_output.cog.sanger.ac.uk/AR0002-C.bam,50e3259364b761a64b019948cc8bfbcd,https://vo_agam_output.cog.sanger.ac.uk/AR0002-C.vcf.gz,837999991a46a0aef6024706d8673337,https://vo_agam_output.cog.sanger.ac.uk/AR0002-C.gatk.zarr.zip,2710b2b38b59f5e234c270053d4adeb8,vr-pipe-vo-agam,AR0002,sequenced,False,True,ERS224290,LUA002,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0059-C,https://vo_agam_output.cog.sanger.ac.uk/AR0059-C.bam,e54ccf05dd0c77ac3df280a071635b38,https://vo_agam_output.cog.sanger.ac.uk/AR0059-C.vcf.gz,beb5e7394e12ecd8b0f0c71fba65672c,https://vo_agam_output.cog.sanger.ac.uk/AR0059-C.gatk.zarr.zip,7786005a706186935aee6a8c76d92eb5,vr-pipe-vo-agam,AR0059,sequenced,True,True,ERS224297,LUA059,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0048-C,https://vo_agam_output.cog.sanger.ac.uk/AR0048-C.bam,dd073b38cf541c762227b555f4bf2cab,https://vo_agam_output.cog.sanger.ac.uk/AR0048-C.vcf.gz,b47b13051ba5d0485d2aef552f3f7ec8,https://vo_agam_output.cog.sanger.ac.uk/AR0048-C.gatk.zarr.zip,d0d6714120727d17bbf6f4d7a1fb5021,vr-pipe-vo-agam,AR0048,sequenced,False,True,ERS224299,LUA048,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0011-C,https://vo_agam_output.cog.sanger.ac.uk/AR0011-C.bam,ee1a0e62216ee27282e1126f7a1bedad,https://vo_agam_output.cog.sanger.ac.uk/AR0011-C.vcf.gz,a216a078c130a10dc8cd96f5473c52f4,https://vo_agam_output.cog.sanger.ac.uk/AR0011-C.gatk.zarr.zip,341cd85c4e6e70f651eebd90b8912c8f,vr-pipe-vo-agam,AR0011,sequenced,True,True,ERS224318,LUA011,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0009-C,https://vo_agam_output.cog.sanger.ac.uk/AR0009-C.bam,f3e8247292256203dad7402a8f0ed6f7,https://vo_agam_output.cog.sanger.ac.uk/AR0009-C.vcf.gz,9bf597464382384148b9de5f5c4272ea,https://vo_agam_output.cog.sanger.ac.uk/AR0009-C.gatk.zarr.zip,b3fd4fe4e57ad326b025abae586fc631,vr-pipe-vo-agam,AR0009,sequenced,True,True,ERS224325,LUA009,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0043-C,https://vo_agam_output.cog.sanger.ac.uk/AR0043-C.bam,1169018b1f9db1f4284ad9346636f207,https://vo_agam_output.cog.sanger.ac.uk/AR0043-C.vcf.gz,f978dad6d9a015989d79bde3a3a2f3f1,https://vo_agam_output.cog.sanger.ac.uk/AR0043-C.gatk.zarr.zip,0cd44def220a6615b30500d5726eedbb,vr-pipe-vo-agam,AR0043,sequenced,True,True,ERS224331,LUA043,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0035-C,https://vo_agam_output.cog.sanger.ac.uk/AR0035-C.bam,9fb221a444cbd9ddeb42eeb739a64454,https://vo_agam_output.cog.sanger.ac.uk/AR0035-C.vcf.gz,ceb09454f71d0028d9aeab80dd5e7bb1,https://vo_agam_output.cog.sanger.ac.uk/AR0035-C.gatk.zarr.zip,99c32824fc3c5245b9df6c24ddf761d2,vr-pipe-vo-agam,AR0035,sequenced,True,True,ERS224333,LUA035,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0074-C,https://vo_agam_output.cog.sanger.ac.uk/AR0074-C.bam,0bfb9626fca3f49d8a0d3561d9da0766,https://vo_agam_output.cog.sanger.ac.uk/AR0074-C.vcf.gz,029366842b30fb70e8b47f45a156e6f3,https://vo_agam_output.cog.sanger.ac.uk/AR0074-C.gatk.zarr.zip,6137fcce3cf822f1165696b7604b6322,vr-pipe-vo-agam,AR0074,sequenced,True,True,ERS224306,LUA074,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0045-C,https://vo_agam_output.cog.sanger.ac.uk/AR0045-C.bam,5eb49c1386debd13bdf4b7d84c8014a1,https://vo_agam_output.cog.sanger.ac.uk/AR0045-C.vcf.gz,137d584321aa3770ce0ecee99e8e0cd5,https://vo_agam_output.cog.sanger.ac.uk/AR0045-C.gatk.zarr.zip,1a9098de433ae63b49b1a01af8af1cbe,vr-pipe-vo-agam,AR0045,sequenced,True,True,ERS224327,LUA045,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0073-C,https://vo_agam_output.cog.sanger.ac.uk/AR0073-C.bam,443fece7b29c23e34463c0f0b3bab919,https://vo_agam_output.cog.sanger.ac.uk/AR0073-C.vcf.gz,f984e757c73b0b9125abc75e87bad713,https://vo_agam_output.cog.sanger.ac.uk/AR0073-C.gatk.zarr.zip,f218e62aca8272d431ba3b0aad917265,vr-pipe-vo-agam,AR0073,sequenced,True,True,ERS224791,LUA073,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0004-C,https://vo_agam_output.cog.sanger.ac.uk/AR0004-C.bam,ad4ba68f822287a8f517fabab807a38f,https://vo_agam_output.cog.sanger.ac.uk/AR0004-C.vcf.gz,2ae005f73ce86533b6ed21dbe01df82c,https://vo_agam_output.cog.sanger.ac.uk/AR0004-C.gatk.zarr.zip,b3eb496504517d565f3f359f0fc199c7,vr-pipe-vo-agam,AR0004,sequenced,True,True,ERS224792,LUA004,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0040-C,https://vo_agam_output.cog.sanger.ac.uk/AR0040-C.bam,d10999b1d5c966d5c8b456394d2a3516,https://vo_agam_output.cog.sanger.ac.uk/AR0040-C.vcf.gz,10f7a32d0274521e5b51a99c2951d172,https://vo_agam_output.cog.sanger.ac.uk/AR0040-C.gatk.zarr.zip,c917b10bfcaffd6020e772cd74e45483,vr-pipe-vo-agam,AR0040,sequenced,False,True,ERS224793,LUA040,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0052-C,https://vo_agam_output.cog.sanger.ac.uk/AR0052-C.bam,7333739c67e1a19ea8a27751b013d90b,https://vo_agam_output.cog.sanger.ac.uk/AR0052-C.vcf.gz,93286f2cf4ccb94c04acf19e18be6159,https://vo_agam_output.cog.sanger.ac.uk/AR0052-C.gatk.zarr.zip,2de3cf7bd7ca0ab7a89c80bca55162f9,vr-pipe-vo-agam,AR0052,sequenced,False,True,ERS224794,LUA052,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0064-C,https://vo_agam_output.cog.sanger.ac.uk/AR0064-C.bam,4e841a040a067e9d656dc383e18e2a7c,https://vo_agam_output.cog.sanger.ac.uk/AR0064-C.vcf.gz,e88eaf5962232f3536f4258cb6c1de9f,https://vo_agam_output.cog.sanger.ac.uk/AR0064-C.gatk.zarr.zip,c07205c67f0318c354a1a7ea0e259ff5,vr-pipe-vo-agam,AR0064,sequenced,False,True,ERS224795,LUA064,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0044-C,https://vo_agam_output.cog.sanger.ac.uk/AR0044-C.bam,a6ba37b1ec3a7e3795d3a668ea95e10c,https://vo_agam_output.cog.sanger.ac.uk/AR0044-C.vcf.gz,2ef082d3954d96fc9f739ce5423cb484,https://vo_agam_output.cog.sanger.ac.uk/AR0044-C.gatk.zarr.zip,ae7c9b42bcbce97db07c8b4aae2379ea,vr-pipe-vo-agam,AR0044,sequenced,False,True,ERS224797,LUA044,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0036-C,https://vo_agam_output.cog.sanger.ac.uk/AR0036-C.bam,a0a5f3e382b0fa5c631a96686460b159,https://vo_agam_output.cog.sanger.ac.uk/AR0036-C.vcf.gz,7d36225efcd7c73f25e1cce4e419261b,https://vo_agam_output.cog.sanger.ac.uk/AR0036-C.gatk.zarr.zip,9bd8679c06cfaa028640526d6c4e995a,vr-pipe-vo-agam,AR0036,sequenced,False,True,ERS224799,LUA036,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0001-C,https://vo_agam_output.cog.sanger.ac.uk/AR0001-C.bam,34b9e0aa7f48b08f2cce393e94d7ffd5,https://vo_agam_output.cog.sanger.ac.uk/AR0001-C.vcf.gz,a43f2f836ea6dfc73f6a66f4de858484,https://vo_agam_output.cog.sanger.ac.uk/AR0001-C.gatk.zarr.zip,5afbe24bcef72bb8b505e9b3533196a6,vr-pipe-vo-agam,AR0001,sequenced,False,True,ERS224790,LUA001,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0006-C,https://vo_agam_output.cog.sanger.ac.uk/AR0006-C.bam,02048669f28c542d7486eb4f52acc447,https://vo_agam_output.cog.sanger.ac.uk/AR0006-C.vcf.gz,13527e9a4b649c69acd036f3904714a3,https://vo_agam_output.cog.sanger.ac.uk/AR0006-C.gatk.zarr.zip,f8a47ed8dc74e9aece44e3c3e9b99ae3,vr-pipe-vo-agam,AR0006,sequenced,False,True,ERS224796,LUA006,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0046-C,https://vo_agam_output.cog.sanger.ac.uk/AR0046-C.bam,5169cc457a47b5569a5c7edb19c62a28,https://vo_agam_output.cog.sanger.ac.uk/AR0046-C.vcf.gz,def02089d88b2ac790cb73cd33fe8efa,https://vo_agam_output.cog.sanger.ac.uk/AR0046-C.gatk.zarr.zip,307ab946653bd3b2c6d398ac70a6ab8b,vr-pipe-vo-agam,AR0046,sequenced,False,True,ERS224798,LUA046,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0070-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0070-Cx.bam,d14a358adc235c701a0dd46cdb00116c,https://vo_agam_output.cog.sanger.ac.uk/AR0070-Cx.vcf.gz,821c45c40871d656afbc101a25a90d22,https://vo_agam_output.cog.sanger.ac.uk/AR0070-Cx.gatk.zarr.zip,54be7173685501e13275d463ff5c2b58,vr-pipe-vo-agam,AR0070,sequenced,False,False,ERS224088,LUA070,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0010-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0010-Cx.bam,8e473041ae0fa84c2f5edb0db1311173,https://vo_agam_output.cog.sanger.ac.uk/AR0010-Cx.vcf.gz,7eec87b3d8bc6720eccc6a2238a9e551,https://vo_agam_output.cog.sanger.ac.uk/AR0010-Cx.gatk.zarr.zip,da939bf47ee802283e3dd4f4a13a8253,vr-pipe-vo-agam,AR0010,sequenced,False,False,ERS224099,LUA010,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0090-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0090-Cx.bam,55b5af66ad8704337696a7871ef2206b,https://vo_agam_output.cog.sanger.ac.uk/AR0090-Cx.vcf.gz,02157eeed20600ccfa33b429cb36ce67,https://vo_agam_output.cog.sanger.ac.uk/AR0090-Cx.gatk.zarr.zip,da519fe1ee6b710bf65e73d0fd75cbb7,vr-pipe-vo-agam,AR0090,sequenced,False,False,ERS224106,LUA090,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0054-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0054-Cx.bam,deef7b8446302abe7627814f5bc79c29,https://vo_agam_output.cog.sanger.ac.uk/AR0054-Cx.vcf.gz,24ebdf41a844a39b546495a41db71417,https://vo_agam_output.cog.sanger.ac.uk/AR0054-Cx.gatk.zarr.zip,f567dca5d621d6af0e64b39ffd67bf28,vr-pipe-vo-agam,AR0054,sequenced,False,False,ERS224119,LUA054,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0016-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0016-Cx.bam,b6b3651834ba66a7f1410d8c9f7b3bd8,https://vo_agam_output.cog.sanger.ac.uk/AR0016-Cx.vcf.gz,c664a019cca194a5603a6af8d3bf6bce,https://vo_agam_output.cog.sanger.ac.uk/AR0016-Cx.gatk.zarr.zip,e338c564b1b0a4e76c1245807abc6490,vr-pipe-vo-agam,AR0016,sequenced,False,False,ERS224127,LUA016,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0050-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0050-Cx.bam,bb6f0079ba1e303fa11d60aed289fe75,https://vo_agam_output.cog.sanger.ac.uk/AR0050-Cx.vcf.gz,555d1acb7d9862f6d2021f6a0334138f,https://vo_agam_output.cog.sanger.ac.uk/AR0050-Cx.gatk.zarr.zip,89cdaa120d93be1524eb1e9268d7af92,vr-pipe-vo-agam,AR0050,sequenced,False,False,ERS224177,LUA050,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0069-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0069-Cx.bam,42a9453bc4906369b1600c87db66b246,https://vo_agam_output.cog.sanger.ac.uk/AR0069-Cx.vcf.gz,630dfac2a07fca4ba92717204f1f6e21,https://vo_agam_output.cog.sanger.ac.uk/AR0069-Cx.gatk.zarr.zip,dd1fbc0bb7a843f9889775b1692062e8,vr-pipe-vo-agam,AR0069,sequenced,False,False,ERS224183,LUA069,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0018-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0018-Cx.bam,906d0ac9fda0d8dcc4a2bf410fd578ef,https://vo_agam_output.cog.sanger.ac.uk/AR0018-Cx.vcf.gz,9e74a80493ff933f448db763d2f551d8,https://vo_agam_output.cog.sanger.ac.uk/AR0018-Cx.gatk.zarr.zip,2d00cfb69b932fe45a79ec442095a84e,vr-pipe-vo-agam,AR0018,sequenced,False,False,ERS224234,LUA018,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0081-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0081-Cx.bam,3bc94bfc1ea852a8582a3a2b178ef2e0,https://vo_agam_output.cog.sanger.ac.uk/AR0081-Cx.vcf.gz,89f5834761a0f0adb15d6788eb2ae37e,https://vo_agam_output.cog.sanger.ac.uk/AR0081-Cx.gatk.zarr.zip,d7b6070a2c16f5fe1f3f2eb1b840a59b,vr-pipe-vo-agam,AR0081,sequenced,False,False,ERS224268,LUA081,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0013-Cx,https://vo_agam_output.cog.sanger.ac.uk/AR0013-Cx.bam,3ec2457474e62ee3bb39eec5dff12cf4,https://vo_agam_output.cog.sanger.ac.uk/AR0013-Cx.vcf.gz,bbca3b0dcde729e456ba463bcbb71d28,https://vo_agam_output.cog.sanger.ac.uk/AR0013-Cx.gatk.zarr.zip,1136a4f2c5a7ccbcc42a4f33623ceb3a,vr-pipe-vo-agam,AR0013,sequenced,False,False,ERS224323,LUA013,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0026-C,https://vo_agam_output.cog.sanger.ac.uk/AR0026-C.bam,e13233379b59953e2807a1fae60bccd7,https://vo_agam_output.cog.sanger.ac.uk/AR0026-C.vcf.gz,5fdeb1c1be67d6d49dcdaeb8b601de0b,https://vo_agam_output.cog.sanger.ac.uk/AR0026-C.gatk.zarr.zip,9bd7bb1fac921979cd45cfca11a7bc14,vr-pipe-vo-agam,AR0026,sequenced,True,True,ERS254305,LUA026,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO +AR0053-C,https://vo_agam_output.cog.sanger.ac.uk/AR0053-C.bam,7019a711e42fd59a155aff8944f993f3,https://vo_agam_output.cog.sanger.ac.uk/AR0053-C.vcf.gz,8eb0c4e09aebbf9e3eeac27d36f2d0ec,https://vo_agam_output.cog.sanger.ac.uk/AR0053-C.gatk.zarr.zip,7512126b12670fd84a4a3bc85b032305,vr-pipe-vo-agam,AR0053,sequenced,True,True,ERS254306,LUA053,Joao Pinto,Angola,Luanda,2009,4,AG1000G-AO diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/samples.meta.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/samples.meta.csv new file mode 100644 index 000000000..0e6efc095 --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/samples.meta.csv @@ -0,0 +1,182 @@ +sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call +AB0085-Cx,BF2-4,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0086-Cx,BF2-6,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0087-C,BF3-3,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0088-C,BF3-5,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0089-Cx,BF3-8,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0090-C,BF3-10,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0091-C,BF3-12,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0092-C,BF3-13,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0094-Cx,BF3-17,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0095-Cx,BF4-1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0096-C,BF4-4,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0097-Cx,BF4-5,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0098-Cx,BF4-6,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0099-Cx,BF6-1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0100-C,BF6-2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0101-C,BF6-3,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0103-C,BF6-7,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0104-Cx,BF6-8,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0109-C,BF9-1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0110-C,BF9-2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0111-C,BF9-3,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0112-C,BF3-18,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0113-C,BF3-19,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0114-C,BF4-7,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0115-C,BF4-8,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0116-C,BF4-10,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0117-C,BF3-21,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0118-C,BF3-22,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0119-C,BF3-23,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0121-C,BF3-25,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0122-C,BF3-26,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0123-C,BF3-27,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0124-C,BF3-28,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0126-Cx,BF3-30,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0127-C,BF3-31,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0128-C,BF3-32,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0129-C,BF3-33,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0130-Cx,BF3-34,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0131-Cx,BF3-35,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0132-C,BF2-13,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0133-C,BF2-14,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0134-C,BF2-15,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0135-C,BF2-16,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0136-Cx,BF2-17,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0137-Cx,BF2-18,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0138-Cx,BF2-19,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0139-C,BF2-20,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0140-C,BF2-21,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0142-C,BF2-23,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0143-Cx,BF8-1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0144-C,BF8-2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0145-C,BF8-3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0146-Cx,BF8-5,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0147-C,BF8-6,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0148-Cx,BF8-7,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0150-Cx,BF3-37,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0151-Cx,BF3-38,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0153-C,BF3-40,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0155-Cx,BF3-42,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0157-Cx,BF3-44,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0158-Cx,BF3-45,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0159-C,BF3-46,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0160-Cx,BF3-47,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0161-C,BF3-48,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,M +AB0162-C,BF3-54,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0164-C,BF3-56,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0165-C,BF3-59,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0166-C,BF3-60,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0167-C,BF3-61,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0169-C,BF3-64,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0170-C,BF3-65,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0171-Cx,BF3-66,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,M +AB0172-Cx,BF10-1,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0173-C,BF10-2,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0174-C,BF10-3,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0175-Cx,BF10-4,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0176-Cx,BF10-5,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0177-C,BF10-7,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0178-C,BF10-8,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0179-Cx,BF10-9,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0180-Cx,BF10-10,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0181-C,BFBana 4.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0182-C,BFBana 4.3,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0183-C,BFBana5.1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0184-C,BFBana 5.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0185-Cx,BFBana 6.1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0186-C,BFBana 6.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0187-C,BFBana 7.1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0188-C,BFBana 7.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0189-C,BFBana 8.1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0190-C,BFBana 8.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0191-Cx,BFBana 17.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0192-C,BFBana 17.3,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0193-C,BFBana 18.1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0194-C,BFBana 18.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0196-C,BFBana 19.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0197-C,BFPala 36.1,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0198-C,BFPala 36.2,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0199-C,BFPala 36.3,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0200-C,BFPala 36.4,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0201-Cx,BFPala 48.1,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0202-Cx,BFPala 48.2,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0203-C,BFPala 48.3,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0204-C,BFPala 58.1,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0205-C,BFPala 58.2,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0206-C,BFPala 58.3,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0207-C,BFSour 59.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0208-C,BFSour 59.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0209-C,BFBana 14.1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0210-C,BFBana 14.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0211-C,BFBana 16.1,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0212-C,BFBana 16.2,Austin Burt,Burkina Faso,Bana Village,2012,7,11.233,-4.472,F +AB0213-C,BFSour 59.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0214-C,BFSour 46.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0215-C,BFSour 46.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0216-C,BFSour 47.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0217-C,BFSour 54.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0218-C,BFSour 54.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0219-Cx,BFSour 54.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0221-C,BFSour 55.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0222-C,BFSour 55.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0223-C,BFSour 56.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0224-C,BFSour 57.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0225-C,BFSour 57.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0226-C,BFSour 57.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0227-Cx,BF11-2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0228-C,BF11-3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0229-C,BF11-4,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0230-Cx,BF11-6,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0231-C,BF11-7,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0232-Cx,BF11-8,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0233-C,BF11-9,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0234-C,BF11-10,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0235-C,BF11-11,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0236-Cx,BF11-12,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0237-C,BF11-13,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0238-C,BF11-14,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0239-C,BF11-15,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0240-C,BF11-16,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0241-C,BF11-17,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0242-C,BF11-18,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0243-C,BF11-19,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0244-C,BF11-20,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0246-C,BF11-22,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0247-C,BF11-23,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0248-C,BF11-25,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0249-Cx,BF11-26,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0250-Cx,BF11-27,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0251-C,BF11-28,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0252-C,BF11-29,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0253-C,BF11-30,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,11.238,-4.235,F +AB0255-C,BF12-2,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0256-C,BF12-3,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0257-C,BF12-4,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0258-Cx,BF12-5,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0259-Cx,BF12-6,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0260-C,BF12-7,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0261-Cx,BF12-8,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0262-Cx,BF12-9,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0263-C,BF12-10,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0264-C,BF12-13,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0265-C,BF12-14,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0266-Cx,BF12-15,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0267-C,BF12-16,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0268-C,BF12-17,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0269-Cx,BF12-18,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0270-C,BF12-20,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0271-Cx,BF12-22,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0272-Cx,BF12-23,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0273-Cx,BF12-24,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0274-C,BF12-25,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0275-C,BF12-26,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0276-C,BF12-27,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0277-C,BF12-28,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0278-C,BF12-29,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0279-C,BF12-30,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0280-Cx,BF12-31,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0281-Cx,BF12-32,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0282-Cx,BF12-33,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0283-C,BF10-12,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F +AB0284-C,BF10-13,Austin Burt,Burkina Faso,Pala,2012,7,11.151,-4.235,F diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/wgs_snp_data.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/wgs_snp_data.csv new file mode 100644 index 000000000..cd8f62db7 --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/general/AG1000G-BF-A/wgs_snp_data.csv @@ -0,0 +1,182 @@ +sample_id,alignments_bam,alignments_bam_md5,snp_genotypes_vcf,snp_genotypes_vcf_md5,snp_genotypes_zarr,snp_genotypes_zarr_md5,pipeline_version,original_sample_id,seq_state,build_phase1,build_phase2,ebi_sample_acc,partner_sample_id,contributor,country,location,year,month,sample_set +AB0085-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0085-Cx.bam,c0549b637164cbdc983fdee22cf1c1a4,https://vo_agam_output.cog.sanger.ac.uk/AB0085-Cx.vcf.gz,acbfbab97274ef0b7ef35ebc2aa42600,https://vo_agam_output.cog.sanger.ac.uk/AB0085-Cx.gatk.zarr.zip,f34785728d07a0acec4a65e8822c982c,vr-pipe-vo-agam,AB0085,sequenced,False,False,ERS223996,BF2-4,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0086-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0086-Cx.bam,b61bae3f79f99c586407a1c7709ff75c,https://vo_agam_output.cog.sanger.ac.uk/AB0086-Cx.vcf.gz,c023416e23b47a1ec6f075af93edf1f7,https://vo_agam_output.cog.sanger.ac.uk/AB0086-Cx.gatk.zarr.zip,04e7c550d6af7b8c5e72791dc5ce2379,vr-pipe-vo-agam,AB0086,sequenced,False,False,ERS223972,BF2-6,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0087-C,https://vo_agam_output.cog.sanger.ac.uk/AB0087-C.bam,cc45af0139f14ef7e2372f533206c054,https://vo_agam_output.cog.sanger.ac.uk/AB0087-C.vcf.gz,312e68628c23179dcdfa90b547d29e77,https://vo_agam_output.cog.sanger.ac.uk/AB0087-C.gatk.zarr.zip,ade9215ea424fe49fd7c807489b6bf95,vr-pipe-vo-agam,AB0087,sequenced,True,True,ERS224013,BF3-3,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0088-C,https://vo_agam_output.cog.sanger.ac.uk/AB0088-C.bam,109c038effde580f4dd441822a888d8a,https://vo_agam_output.cog.sanger.ac.uk/AB0088-C.vcf.gz,5de383bf061c3d6585a5efd1dc8b1bbe,https://vo_agam_output.cog.sanger.ac.uk/AB0088-C.gatk.zarr.zip,9245872c9486d0b3ca7ec56bf81af880,vr-pipe-vo-agam,AB0088,sequenced,True,True,ERS223991,BF3-5,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0089-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0089-Cx.bam,982a7475688988baf3641a7fb78478fe,https://vo_agam_output.cog.sanger.ac.uk/AB0089-Cx.vcf.gz,8840bcaf1e11ad9584fd5388cb8a0d3f,https://vo_agam_output.cog.sanger.ac.uk/AB0089-Cx.gatk.zarr.zip,3b8a93225b81b3a106b36dd37d785cac,vr-pipe-vo-agam,AB0089,sequenced,False,False,ERS224031,BF3-8,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0090-C,https://vo_agam_output.cog.sanger.ac.uk/AB0090-C.bam,ebc1fd4b900cb167385db562e36be821,https://vo_agam_output.cog.sanger.ac.uk/AB0090-C.vcf.gz,a8f4ddaeb0e8002c7f8dcaaa94640b79,https://vo_agam_output.cog.sanger.ac.uk/AB0090-C.gatk.zarr.zip,f9c9bf08bbbfa6744522bc2a3d595ae8,vr-pipe-vo-agam,AB0090,sequenced,True,True,ERS223936,BF3-10,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0091-C,https://vo_agam_output.cog.sanger.ac.uk/AB0091-C.bam,36fbf2172c59abd0a1b2b86ce17a7c2c,https://vo_agam_output.cog.sanger.ac.uk/AB0091-C.vcf.gz,f837f59a6cbffee1ae10ab47e6641b16,https://vo_agam_output.cog.sanger.ac.uk/AB0091-C.gatk.zarr.zip,f177fe636f341ff320c7aee45970026b,vr-pipe-vo-agam,AB0091,sequenced,True,True,ERS224065,BF3-12,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0092-C,https://vo_agam_output.cog.sanger.ac.uk/AB0092-C.bam,5740aa26ca7579f7fb132508dbe04716,https://vo_agam_output.cog.sanger.ac.uk/AB0092-C.vcf.gz,11422918ee9634ca7779bfb0db7c455a,https://vo_agam_output.cog.sanger.ac.uk/AB0092-C.gatk.zarr.zip,d43de75d1324458a40995baec1ad4f48,vr-pipe-vo-agam,AB0092,sequenced,True,True,ERS224043,BF3-13,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0094-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0094-Cx.bam,e7ebeb4b03367691924d6a530f628544,https://vo_agam_output.cog.sanger.ac.uk/AB0094-Cx.vcf.gz,2f2972716d6f1f0650de288579bb5673,https://vo_agam_output.cog.sanger.ac.uk/AB0094-Cx.gatk.zarr.zip,92f3aaece851cf5f2727ec9c6e10eb02,vr-pipe-vo-agam,AB0094,sequenced,False,False,ERS224057,BF3-17,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0095-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0095-Cx.bam,fba63f347d443342ae23fd38f3ca9c16,https://vo_agam_output.cog.sanger.ac.uk/AB0095-Cx.vcf.gz,60289e425f6e04fcfa8cd88632c75f55,https://vo_agam_output.cog.sanger.ac.uk/AB0095-Cx.gatk.zarr.zip,992e8624406eb50937f51fe8b28f7d5a,vr-pipe-vo-agam,AB0095,sequenced,False,False,ERS224021,BF4-1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0096-C,https://vo_agam_output.cog.sanger.ac.uk/AB0096-C.bam,7bfa3745ce661c04b6a1d8662fcad46d,https://vo_agam_output.cog.sanger.ac.uk/AB0096-C.vcf.gz,9d475d8c73a5ff39e95b500f6eb848de,https://vo_agam_output.cog.sanger.ac.uk/AB0096-C.gatk.zarr.zip,f27f4dc699e71e01e80c0afb895c6048,vr-pipe-vo-agam,AB0096,sequenced,False,True,ERS223998,BF4-4,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0097-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0097-Cx.bam,feb5c37b6fe37c258516249af73ab0ea,https://vo_agam_output.cog.sanger.ac.uk/AB0097-Cx.vcf.gz,37eaae2c7532b9a3351e8f18ab3f6607,https://vo_agam_output.cog.sanger.ac.uk/AB0097-Cx.gatk.zarr.zip,898e4601e2fe16a6256b81533435d35b,vr-pipe-vo-agam,AB0097,sequenced,False,False,ERS224030,BF4-5,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0098-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0098-Cx.bam,5ccbdbfa0fa4ae6aba9d4b1990ceadd2,https://vo_agam_output.cog.sanger.ac.uk/AB0098-Cx.vcf.gz,880e6f24a2f7db3ca6a85ac37b4e56b1,https://vo_agam_output.cog.sanger.ac.uk/AB0098-Cx.gatk.zarr.zip,44de588d70f094ac0580a0be793f6ac7,vr-pipe-vo-agam,AB0098,sequenced,False,False,ERS223999,BF4-6,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0099-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0099-Cx.bam,7cb7e8e7930cddf991ad062ebc5b28e3,https://vo_agam_output.cog.sanger.ac.uk/AB0099-Cx.vcf.gz,996c8f7b67dbd9dedb93edc163534595,https://vo_agam_output.cog.sanger.ac.uk/AB0099-Cx.gatk.zarr.zip,610bd6774b0ea7e4294521203316b2be,vr-pipe-vo-agam,AB0099,sequenced,False,False,ERS223941,BF6-1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0100-C,https://vo_agam_output.cog.sanger.ac.uk/AB0100-C.bam,4659a0d5c15f506915c0da49b61b256f,https://vo_agam_output.cog.sanger.ac.uk/AB0100-C.vcf.gz,6dd0fae92ddf7c3952a370721b54a643,https://vo_agam_output.cog.sanger.ac.uk/AB0100-C.gatk.zarr.zip,2662971c614c4ed0acd6291f3159b6e6,vr-pipe-vo-agam,AB0100,sequenced,True,True,ERS224020,BF6-2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0101-C,https://vo_agam_output.cog.sanger.ac.uk/AB0101-C.bam,9a01cb0be6fb6e459e288f3ca9017795,https://vo_agam_output.cog.sanger.ac.uk/AB0101-C.vcf.gz,8a775704296f9d7eadd46cba81d7103c,https://vo_agam_output.cog.sanger.ac.uk/AB0101-C.gatk.zarr.zip,92dafb34b73675676ccde29e00d8cd0c,vr-pipe-vo-agam,AB0101,sequenced,True,True,ERS223977,BF6-3,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0103-C,https://vo_agam_output.cog.sanger.ac.uk/AB0103-C.bam,98906815c6f72ff721fef4cb60d2a6fe,https://vo_agam_output.cog.sanger.ac.uk/AB0103-C.vcf.gz,ca5db645a666245c2e54b73950012c5a,https://vo_agam_output.cog.sanger.ac.uk/AB0103-C.gatk.zarr.zip,931680fc45cdd5a5ab20b9a2e8fbdfa1,vr-pipe-vo-agam,AB0103,sequenced,True,True,ERS223970,BF6-7,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0104-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0104-Cx.bam,8b014dbd9959670218cdb20036b8d168,https://vo_agam_output.cog.sanger.ac.uk/AB0104-Cx.vcf.gz,677a10be7d45a6f8993610031db477c6,https://vo_agam_output.cog.sanger.ac.uk/AB0104-Cx.gatk.zarr.zip,0d20ca10968dcec732a16069756f6347,vr-pipe-vo-agam,AB0104,sequenced,False,False,ERS223976,BF6-8,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0109-C,https://vo_agam_output.cog.sanger.ac.uk/AB0109-C.bam,d0addce873e43a28ce0ddf264732cf31,https://vo_agam_output.cog.sanger.ac.uk/AB0109-C.vcf.gz,9e2aa9df4e973fe87632ed8804a026a4,https://vo_agam_output.cog.sanger.ac.uk/AB0109-C.gatk.zarr.zip,c294349f48fcb7b6ab998816077d9ecb,vr-pipe-vo-agam,AB0109,sequenced,True,True,ERS223925,BF9-1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0110-C,https://vo_agam_output.cog.sanger.ac.uk/AB0110-C.bam,6b9e814b203f8ec10615ac973476f3fd,https://vo_agam_output.cog.sanger.ac.uk/AB0110-C.vcf.gz,ad93dad58bbf9dd85421896f36b2463d,https://vo_agam_output.cog.sanger.ac.uk/AB0110-C.gatk.zarr.zip,db50c352e3d400de5ae375545ad69953,vr-pipe-vo-agam,AB0110,sequenced,True,True,ERS224009,BF9-2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0111-C,https://vo_agam_output.cog.sanger.ac.uk/AB0111-C.bam,9132d3ef656ebb4b0729320354ee3c36,https://vo_agam_output.cog.sanger.ac.uk/AB0111-C.vcf.gz,934b9dfb6633c9b43bd60d306dc50956,https://vo_agam_output.cog.sanger.ac.uk/AB0111-C.gatk.zarr.zip,02d7ce7d07dd68655d668634dc7a5eb8,vr-pipe-vo-agam,AB0111,sequenced,True,True,ERS224056,BF9-3,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0112-C,https://vo_agam_output.cog.sanger.ac.uk/AB0112-C.bam,db93bad4715d9ab29e2faec2a5f6969e,https://vo_agam_output.cog.sanger.ac.uk/AB0112-C.vcf.gz,618cb8caf6fa880ba2e37c7b87182503,https://vo_agam_output.cog.sanger.ac.uk/AB0112-C.gatk.zarr.zip,43aabe6bbfff95aa22de97e00dfbe732,vr-pipe-vo-agam,AB0112,sequenced,True,True,ERS224015,BF3-18,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0113-C,https://vo_agam_output.cog.sanger.ac.uk/AB0113-C.bam,901eadd69e44186d53873598f0055ac5,https://vo_agam_output.cog.sanger.ac.uk/AB0113-C.vcf.gz,73d36d9901c0f0a5b05c039133b7727d,https://vo_agam_output.cog.sanger.ac.uk/AB0113-C.gatk.zarr.zip,943318cd95959a035517cd503f307a66,vr-pipe-vo-agam,AB0113,sequenced,True,True,ERS223984,BF3-19,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0114-C,https://vo_agam_output.cog.sanger.ac.uk/AB0114-C.bam,eb84137bd067f709842d76dfd8e7d487,https://vo_agam_output.cog.sanger.ac.uk/AB0114-C.vcf.gz,e7de588d3c39861467c1014dfd2fcea5,https://vo_agam_output.cog.sanger.ac.uk/AB0114-C.gatk.zarr.zip,c96991c62ac0084cd3153f4340f97338,vr-pipe-vo-agam,AB0114,sequenced,True,True,ERS224023,BF4-7,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0115-C,https://vo_agam_output.cog.sanger.ac.uk/AB0115-C.bam,e83afab6ace566c050c960b73c98726b,https://vo_agam_output.cog.sanger.ac.uk/AB0115-C.vcf.gz,29367ff40d68588f3c2881a0861b86c2,https://vo_agam_output.cog.sanger.ac.uk/AB0115-C.gatk.zarr.zip,01840db7859a21f57cf6e4a191d43baa,vr-pipe-vo-agam,AB0115,sequenced,False,True,ERS224774,BF4-8,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0116-C,https://vo_agam_output.cog.sanger.ac.uk/AB0116-C.bam,3c79aa50b6086272ef4dc070476bdb41,https://vo_agam_output.cog.sanger.ac.uk/AB0116-C.vcf.gz,4368aed28979fa091d33362e32f069e8,https://vo_agam_output.cog.sanger.ac.uk/AB0116-C.gatk.zarr.zip,a41b51e3a54e751e658f86eaaffcaaa7,vr-pipe-vo-agam,AB0116,sequenced,False,True,ERS224024,BF4-10,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0117-C,https://vo_agam_output.cog.sanger.ac.uk/AB0117-C.bam,c1b95e14c946ed71c711ae16251c69d9,https://vo_agam_output.cog.sanger.ac.uk/AB0117-C.vcf.gz,3e4e3858d3b4c93619590cc255050121,https://vo_agam_output.cog.sanger.ac.uk/AB0117-C.gatk.zarr.zip,0c895290f9e5e578491d34a98181f476,vr-pipe-vo-agam,AB0117,sequenced,True,True,ERS224010,BF3-21,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0118-C,https://vo_agam_output.cog.sanger.ac.uk/AB0118-C.bam,3fd581e89389b1b3528582846d6fa67b,https://vo_agam_output.cog.sanger.ac.uk/AB0118-C.vcf.gz,4016a9dde8b135a83f3c0381173b2929,https://vo_agam_output.cog.sanger.ac.uk/AB0118-C.gatk.zarr.zip,1a15ec5680e65522ac6618f871b50b98,vr-pipe-vo-agam,AB0118,sequenced,False,True,ERS224059,BF3-22,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0119-C,https://vo_agam_output.cog.sanger.ac.uk/AB0119-C.bam,2f8f934dbaf8694bff21af53dbfa03be,https://vo_agam_output.cog.sanger.ac.uk/AB0119-C.vcf.gz,485f13c3d6f0267cdc0972d6839571f5,https://vo_agam_output.cog.sanger.ac.uk/AB0119-C.gatk.zarr.zip,1fdf23b18b8d898563762a104ca555b8,vr-pipe-vo-agam,AB0119,sequenced,True,True,ERS224017,BF3-23,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0121-C,https://vo_agam_output.cog.sanger.ac.uk/AB0121-C.bam,486ee1827a9e7020df92536187824cdd,https://vo_agam_output.cog.sanger.ac.uk/AB0121-C.vcf.gz,0b092ca2058c9ab24b656f0924719b81,https://vo_agam_output.cog.sanger.ac.uk/AB0121-C.gatk.zarr.zip,c45cfb5747eb2faffcf0c3bca51d0151,vr-pipe-vo-agam,AB0121,sequenced,True,True,ERS224080,BF3-25,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0122-C,https://vo_agam_output.cog.sanger.ac.uk/AB0122-C.bam,159a757ad38e2002ad13c83cf4fe9438,https://vo_agam_output.cog.sanger.ac.uk/AB0122-C.vcf.gz,b5391f5283ef1e8ccc958e73c4dc43b8,https://vo_agam_output.cog.sanger.ac.uk/AB0122-C.gatk.zarr.zip,2db82024aeea4326396aeb060a05dddf,vr-pipe-vo-agam,AB0122,sequenced,True,True,ERS224064,BF3-26,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0123-C,https://vo_agam_output.cog.sanger.ac.uk/AB0123-C.bam,6697bf0f192f28a606e1ad1058280e41,https://vo_agam_output.cog.sanger.ac.uk/AB0123-C.vcf.gz,71ead5bdb998260d24f5b2cb558dce5e,https://vo_agam_output.cog.sanger.ac.uk/AB0123-C.gatk.zarr.zip,f3922103fbc09835260c52620635e9da,vr-pipe-vo-agam,AB0123,sequenced,True,True,ERS224045,BF3-27,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0124-C,https://vo_agam_output.cog.sanger.ac.uk/AB0124-C.bam,13cdf37f05c0bed414ab7fa7ab839ae2,https://vo_agam_output.cog.sanger.ac.uk/AB0124-C.vcf.gz,2ccd8e0b07ff8a20163b5b2a3356037f,https://vo_agam_output.cog.sanger.ac.uk/AB0124-C.gatk.zarr.zip,6532eae792ef0d81aa684ca8f8bfc61a,vr-pipe-vo-agam,AB0124,sequenced,True,True,ERS224044,BF3-28,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0126-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0126-Cx.bam,e85ee87444c0cc7243f8c89bdbcf20b4,https://vo_agam_output.cog.sanger.ac.uk/AB0126-Cx.vcf.gz,edc074791f0a4da65b0f28ab35b9917f,https://vo_agam_output.cog.sanger.ac.uk/AB0126-Cx.gatk.zarr.zip,55dc8884b0b7c29a4b26eded765663e1,vr-pipe-vo-agam,AB0126,sequenced,False,False,ERS224079,BF3-30,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0127-C,https://vo_agam_output.cog.sanger.ac.uk/AB0127-C.bam,e0aba21eba20d7feca7e6ee1744b94cb,https://vo_agam_output.cog.sanger.ac.uk/AB0127-C.vcf.gz,e0dfd046bfd9b4ae74fc29620a54c4f1,https://vo_agam_output.cog.sanger.ac.uk/AB0127-C.gatk.zarr.zip,f74e4595612a03a25665d4bff066c753,vr-pipe-vo-agam,AB0127,sequenced,True,True,ERS224775,BF3-31,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0128-C,https://vo_agam_output.cog.sanger.ac.uk/AB0128-C.bam,bce9774c8e50a1e7e09485963ed1917c,https://vo_agam_output.cog.sanger.ac.uk/AB0128-C.vcf.gz,3fb3a0c881bc8c3d618fbfd55432857a,https://vo_agam_output.cog.sanger.ac.uk/AB0128-C.gatk.zarr.zip,1c64a976f4f96ccf57ed1069f0b37f81,vr-pipe-vo-agam,AB0128,sequenced,True,True,ERS224011,BF3-32,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0129-C,https://vo_agam_output.cog.sanger.ac.uk/AB0129-C.bam,e0b46e849efaf39b03c8f48554fada6a,https://vo_agam_output.cog.sanger.ac.uk/AB0129-C.vcf.gz,8373cf4c72fc08f28ff81fafe010078f,https://vo_agam_output.cog.sanger.ac.uk/AB0129-C.gatk.zarr.zip,474e277f31ee4af94de7454e8766927c,vr-pipe-vo-agam,AB0129,sequenced,True,True,ERS224050,BF3-33,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0130-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0130-Cx.bam,20f9368759ca37268eb2505fa69d5687,https://vo_agam_output.cog.sanger.ac.uk/AB0130-Cx.vcf.gz,3b39071c2b297ed14a7266d70edda510,https://vo_agam_output.cog.sanger.ac.uk/AB0130-Cx.gatk.zarr.zip,3303624793d5b1d69d21c4ff96709b5d,vr-pipe-vo-agam,AB0130,sequenced,False,False,ERS224053,BF3-34,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0131-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0131-Cx.bam,2c3fc43d1e51b184e2d748f5ece59b02,https://vo_agam_output.cog.sanger.ac.uk/AB0131-Cx.vcf.gz,9a52f2dc06ad44df75e65f4084a5ebff,https://vo_agam_output.cog.sanger.ac.uk/AB0131-Cx.gatk.zarr.zip,4bb3732b8a359498986a019b1ae363b1,vr-pipe-vo-agam,AB0131,sequenced,False,False,ERS224041,BF3-35,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0132-C,https://vo_agam_output.cog.sanger.ac.uk/AB0132-C.bam,86f1808e27f292d788ffb9eafe2a44fa,https://vo_agam_output.cog.sanger.ac.uk/AB0132-C.vcf.gz,c083310263b83fb8ff869fc6bbcdc6ae,https://vo_agam_output.cog.sanger.ac.uk/AB0132-C.gatk.zarr.zip,cf25c9ce9b151da6113ca6333ae509ff,vr-pipe-vo-agam,AB0132,sequenced,True,True,ERS223830,BF2-13,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0133-C,https://vo_agam_output.cog.sanger.ac.uk/AB0133-C.bam,27e2a802af23febfe9a6ea798ef64389,https://vo_agam_output.cog.sanger.ac.uk/AB0133-C.vcf.gz,4f16feb5d45379719b0d7d11abfb1d85,https://vo_agam_output.cog.sanger.ac.uk/AB0133-C.gatk.zarr.zip,8a1b34f929ef25888323b736e382f1b8,vr-pipe-vo-agam,AB0133,sequenced,True,True,ERS224071,BF2-14,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0134-C,https://vo_agam_output.cog.sanger.ac.uk/AB0134-C.bam,dcf0f13f05aaf22cd35352298f3aaaed,https://vo_agam_output.cog.sanger.ac.uk/AB0134-C.vcf.gz,e87b232aae562ab0405e8e3589e91fc0,https://vo_agam_output.cog.sanger.ac.uk/AB0134-C.gatk.zarr.zip,1e2067da891ef771ce5c315320b888cf,vr-pipe-vo-agam,AB0134,sequenced,True,True,ERS224046,BF2-15,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0135-C,https://vo_agam_output.cog.sanger.ac.uk/AB0135-C.bam,ef5e24ba76c954948e66858d52727e8c,https://vo_agam_output.cog.sanger.ac.uk/AB0135-C.vcf.gz,b0e28ba50bb882c07eb1b6c641fe873f,https://vo_agam_output.cog.sanger.ac.uk/AB0135-C.gatk.zarr.zip,b91edfecdaf82c35eecde8280edd252d,vr-pipe-vo-agam,AB0135,sequenced,True,True,ERS224014,BF2-16,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0136-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0136-Cx.bam,96d6a7229d9c818221c5d20ffe09bd07,https://vo_agam_output.cog.sanger.ac.uk/AB0136-Cx.vcf.gz,dbe2976d6b6573542909e348a025c570,https://vo_agam_output.cog.sanger.ac.uk/AB0136-Cx.gatk.zarr.zip,df154b79fae68825ab1a1ce23412b3dc,vr-pipe-vo-agam,AB0136,sequenced,False,False,ERS224063,BF2-17,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0137-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0137-Cx.bam,e13a05649b14775f90beba1550fe3ed4,https://vo_agam_output.cog.sanger.ac.uk/AB0137-Cx.vcf.gz,b292f5f1d3369bef3262b8a52b6c45e9,https://vo_agam_output.cog.sanger.ac.uk/AB0137-Cx.gatk.zarr.zip,dcf0c301ec54e7e340bf35ccba874fac,vr-pipe-vo-agam,AB0137,sequenced,False,False,ERS224008,BF2-18,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0138-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0138-Cx.bam,a9796d3e37dae8eabf0797cf703457cf,https://vo_agam_output.cog.sanger.ac.uk/AB0138-Cx.vcf.gz,080fae5a50336724bff6ec03032edf56,https://vo_agam_output.cog.sanger.ac.uk/AB0138-Cx.gatk.zarr.zip,26220dc8035a377f0d76ce28abfcd99d,vr-pipe-vo-agam,AB0138,sequenced,False,False,ERS224006,BF2-19,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0139-C,https://vo_agam_output.cog.sanger.ac.uk/AB0139-C.bam,b04599a566e48fc853463fc93c0b0bce,https://vo_agam_output.cog.sanger.ac.uk/AB0139-C.vcf.gz,a519f32e22f0227308294da6cd11368d,https://vo_agam_output.cog.sanger.ac.uk/AB0139-C.gatk.zarr.zip,a82af184de6aedaf35ccf04ed9ddb46e,vr-pipe-vo-agam,AB0139,sequenced,True,True,ERS223955,BF2-20,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0140-C,https://vo_agam_output.cog.sanger.ac.uk/AB0140-C.bam,ad3f03b2a00977c6016d22f8027e70c2,https://vo_agam_output.cog.sanger.ac.uk/AB0140-C.vcf.gz,977c4a43797011782089b4488e72fb3d,https://vo_agam_output.cog.sanger.ac.uk/AB0140-C.gatk.zarr.zip,d5928b86ff376cde8ade6efcc8b24fbb,vr-pipe-vo-agam,AB0140,sequenced,True,True,ERS223963,BF2-21,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0142-C,https://vo_agam_output.cog.sanger.ac.uk/AB0142-C.bam,eec5c7dbbb552998963511844b8c9636,https://vo_agam_output.cog.sanger.ac.uk/AB0142-C.vcf.gz,bab10508721b427b3861d552c903c49a,https://vo_agam_output.cog.sanger.ac.uk/AB0142-C.gatk.zarr.zip,2d8eeb9b6e1e4ffe694038489d9fcf7d,vr-pipe-vo-agam,AB0142,sequenced,True,True,ERS223982,BF2-23,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0143-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0143-Cx.bam,31971ddfe36b613f93ed358cdd1fabdc,https://vo_agam_output.cog.sanger.ac.uk/AB0143-Cx.vcf.gz,bab1882e5c7bc2d0df168bb1f7dd7eb8,https://vo_agam_output.cog.sanger.ac.uk/AB0143-Cx.gatk.zarr.zip,9f7e841d58ce27f83dfdbc3d55819ae6,vr-pipe-vo-agam,AB0143,sequenced,False,False,ERS224051,BF8-1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0144-C,https://vo_agam_output.cog.sanger.ac.uk/AB0144-C.bam,f5b31abdc65b50b2099a04dccc004f58,https://vo_agam_output.cog.sanger.ac.uk/AB0144-C.vcf.gz,60aa0c05810fcab637f2aeb106038325,https://vo_agam_output.cog.sanger.ac.uk/AB0144-C.gatk.zarr.zip,2afedd7350bcd521db823e459cc1c34c,vr-pipe-vo-agam,AB0144,sequenced,True,True,ERS224035,BF8-2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0145-C,https://vo_agam_output.cog.sanger.ac.uk/AB0145-C.bam,66eeb6cad586f4ba3aa4fb1048c3e72b,https://vo_agam_output.cog.sanger.ac.uk/AB0145-C.vcf.gz,bccd089f624aaaa5fa28f148364a20cb,https://vo_agam_output.cog.sanger.ac.uk/AB0145-C.gatk.zarr.zip,8bea72b35d17348282456245ab3c8463,vr-pipe-vo-agam,AB0145,sequenced,True,True,ERS223962,BF8-3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0146-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0146-Cx.bam,cf7d5931c606f9076fbed7bd7a361af3,https://vo_agam_output.cog.sanger.ac.uk/AB0146-Cx.vcf.gz,8451f45087044c613048dc66d7b412f5,https://vo_agam_output.cog.sanger.ac.uk/AB0146-Cx.gatk.zarr.zip,82ae777c87908a424404144315b9cd48,vr-pipe-vo-agam,AB0146,sequenced,False,False,ERS224077,BF8-5,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0147-C,https://vo_agam_output.cog.sanger.ac.uk/AB0147-C.bam,6ffeccf156446b28a394221a447c01c7,https://vo_agam_output.cog.sanger.ac.uk/AB0147-C.vcf.gz,0448d1cd3c7bace916cbf50effb0b1b1,https://vo_agam_output.cog.sanger.ac.uk/AB0147-C.gatk.zarr.zip,471c4a1d340ada65ccb7b7cab855b71b,vr-pipe-vo-agam,AB0147,sequenced,True,True,ERS223924,BF8-6,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0148-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0148-Cx.bam,563d8b3320ac79d52c31fc9a80622e0d,https://vo_agam_output.cog.sanger.ac.uk/AB0148-Cx.vcf.gz,e3b59bc65a94ed52da1e09fc836f45ec,https://vo_agam_output.cog.sanger.ac.uk/AB0148-Cx.gatk.zarr.zip,1780e0557ef154fd2d9f72728b2902e4,vr-pipe-vo-agam,AB0148,sequenced,False,False,ERS224076,BF8-7,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0150-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0150-Cx.bam,5c44086af1e8352bc6f8cff7e1724840,https://vo_agam_output.cog.sanger.ac.uk/AB0150-Cx.vcf.gz,a5c33ae9c00cd4f565bae86df011e264,https://vo_agam_output.cog.sanger.ac.uk/AB0150-Cx.gatk.zarr.zip,cd8260d75ef2949c5fd84adc7ecc9532,vr-pipe-vo-agam,AB0150,sequenced,False,False,ERS224038,BF3-37,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0151-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0151-Cx.bam,5e2e25c1e9e03a39fb20f72388809e27,https://vo_agam_output.cog.sanger.ac.uk/AB0151-Cx.vcf.gz,fdbc6a4d7668290869d1bf0f9141b3b5,https://vo_agam_output.cog.sanger.ac.uk/AB0151-Cx.gatk.zarr.zip,10e22c81c0e5f8b1fc633dc26392e1e6,vr-pipe-vo-agam,AB0151,sequenced,False,False,ERS224003,BF3-38,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0153-C,https://vo_agam_output.cog.sanger.ac.uk/AB0153-C.bam,142da069d40d8f3e8dbf0ce26e75c185,https://vo_agam_output.cog.sanger.ac.uk/AB0153-C.vcf.gz,4abb6955316b9b6196ed3d0ae4a6e552,https://vo_agam_output.cog.sanger.ac.uk/AB0153-C.gatk.zarr.zip,bba89e70919b21ad663ebd68a4e25715,vr-pipe-vo-agam,AB0153,sequenced,True,True,ERS224776,BF3-40,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0155-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0155-Cx.bam,94655a01cef4c00d4fb05d635406a92e,https://vo_agam_output.cog.sanger.ac.uk/AB0155-Cx.vcf.gz,e577d7eb99aaf21aacd6a3cf14940a85,https://vo_agam_output.cog.sanger.ac.uk/AB0155-Cx.gatk.zarr.zip,11d392fd56ddf4df11cb99ede05ee172,vr-pipe-vo-agam,AB0155,sequenced,False,False,ERS224066,BF3-42,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0157-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0157-Cx.bam,87e8b2d91d133b3ae41d1447dbab7c9d,https://vo_agam_output.cog.sanger.ac.uk/AB0157-Cx.vcf.gz,98d2a0e25b8c10e02b69ed751f0e3272,https://vo_agam_output.cog.sanger.ac.uk/AB0157-Cx.gatk.zarr.zip,9d64cc16824be09abbb9ec0836aff157,vr-pipe-vo-agam,AB0157,sequenced,False,False,ERS224058,BF3-44,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0158-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0158-Cx.bam,f54057307eeca18180d305f415d2acc5,https://vo_agam_output.cog.sanger.ac.uk/AB0158-Cx.vcf.gz,545b2e5600cb6634c0c67cb82a2ab2d0,https://vo_agam_output.cog.sanger.ac.uk/AB0158-Cx.gatk.zarr.zip,e53f93c3272d3760e33f2c67656bfc19,vr-pipe-vo-agam,AB0158,sequenced,False,False,ERS224016,BF3-45,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0159-C,https://vo_agam_output.cog.sanger.ac.uk/AB0159-C.bam,883accdfa1a712a9aae3017a6b62ae16,https://vo_agam_output.cog.sanger.ac.uk/AB0159-C.vcf.gz,5cdd871c9ed510da87a23a7b9dbee5b6,https://vo_agam_output.cog.sanger.ac.uk/AB0159-C.gatk.zarr.zip,6e67a2f31b2d18eab8f1616a41fb4403,vr-pipe-vo-agam,AB0159,sequenced,True,True,ERS224770,BF3-46,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0160-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0160-Cx.bam,0522ef18268de52d3a37152714174789,https://vo_agam_output.cog.sanger.ac.uk/AB0160-Cx.vcf.gz,37f53df0714c6115251ba6c55c7d6ce5,https://vo_agam_output.cog.sanger.ac.uk/AB0160-Cx.gatk.zarr.zip,92a9a4ccc671b5de1f423e4cdd3ad15c,vr-pipe-vo-agam,AB0160,sequenced,False,False,ERS224032,BF3-47,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0161-C,https://vo_agam_output.cog.sanger.ac.uk/AB0161-C.bam,3e706172916a0fe0c27559b2e6de444d,https://vo_agam_output.cog.sanger.ac.uk/AB0161-C.vcf.gz,ce031356c5f9b6633954a52ed294fad4,https://vo_agam_output.cog.sanger.ac.uk/AB0161-C.gatk.zarr.zip,f05f265f011e88f27ccc37907addd8b2,vr-pipe-vo-agam,AB0161,sequenced,True,True,ERS224771,BF3-48,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0162-C,https://vo_agam_output.cog.sanger.ac.uk/AB0162-C.bam,8ad0389766f04338aad6f743068f4426,https://vo_agam_output.cog.sanger.ac.uk/AB0162-C.vcf.gz,d8fbdd58205673d5b60b7660da6727d2,https://vo_agam_output.cog.sanger.ac.uk/AB0162-C.gatk.zarr.zip,7737d940a303844c00a00e7f8f588f38,vr-pipe-vo-agam,AB0162,sequenced,False,True,ERS224773,BF3-54,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0164-C,https://vo_agam_output.cog.sanger.ac.uk/AB0164-C.bam,4b5ec5a160f8bb83bb43fc30db18bcd8,https://vo_agam_output.cog.sanger.ac.uk/AB0164-C.vcf.gz,17ddaf788d3f2843c76283630e3f6b37,https://vo_agam_output.cog.sanger.ac.uk/AB0164-C.gatk.zarr.zip,9c1b285f1dc5c6253068a337f2efcc55,vr-pipe-vo-agam,AB0164,sequenced,True,True,ERS224060,BF3-56,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0165-C,https://vo_agam_output.cog.sanger.ac.uk/AB0165-C.bam,040e83fd535459aea6f758d24595357d,https://vo_agam_output.cog.sanger.ac.uk/AB0165-C.vcf.gz,1c35c3c1c499a57fa861f7b191d0b793,https://vo_agam_output.cog.sanger.ac.uk/AB0165-C.gatk.zarr.zip,879f331e2b6d38737cb8d5a44bd7a61f,vr-pipe-vo-agam,AB0165,sequenced,False,True,ERS224777,BF3-59,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0166-C,https://vo_agam_output.cog.sanger.ac.uk/AB0166-C.bam,5b647bdb0da126e63982a5e75c8c5707,https://vo_agam_output.cog.sanger.ac.uk/AB0166-C.vcf.gz,01454ac1d7cf00b765bb1238508b448d,https://vo_agam_output.cog.sanger.ac.uk/AB0166-C.gatk.zarr.zip,44a8197eaec1d2e8cb0e98cc2f276145,vr-pipe-vo-agam,AB0166,sequenced,True,True,ERS224004,BF3-60,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0167-C,https://vo_agam_output.cog.sanger.ac.uk/AB0167-C.bam,9e82a4e278b869caba16c7d0942535be,https://vo_agam_output.cog.sanger.ac.uk/AB0167-C.vcf.gz,195d71f3164549c2dee1b5dc66be5dd7,https://vo_agam_output.cog.sanger.ac.uk/AB0167-C.gatk.zarr.zip,09e630a4494308bd685e35e3c9cf3338,vr-pipe-vo-agam,AB0167,sequenced,False,True,ERS224779,BF3-61,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0169-C,https://vo_agam_output.cog.sanger.ac.uk/AB0169-C.bam,e58be5c8d58b020fe81d4d90f4a3c93e,https://vo_agam_output.cog.sanger.ac.uk/AB0169-C.vcf.gz,269975feb61d9f2bff3d94afdb337620,https://vo_agam_output.cog.sanger.ac.uk/AB0169-C.gatk.zarr.zip,2821a9e4ee648a3676db8430d4d254d2,vr-pipe-vo-agam,AB0169,sequenced,True,True,ERS224052,BF3-64,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0170-C,https://vo_agam_output.cog.sanger.ac.uk/AB0170-C.bam,c537ddbc70612ca9553d5f7e83ba5c12,https://vo_agam_output.cog.sanger.ac.uk/AB0170-C.vcf.gz,b8d793a87abf3765c8ce486098c8b05a,https://vo_agam_output.cog.sanger.ac.uk/AB0170-C.gatk.zarr.zip,c9cb894c2c002f151cbc95e8659aca30,vr-pipe-vo-agam,AB0170,sequenced,True,True,ERS224018,BF3-65,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0171-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0171-Cx.bam,0b9e7ab3243ce2cc14a6f9a6ff275794,https://vo_agam_output.cog.sanger.ac.uk/AB0171-Cx.vcf.gz,9b1063416db8ccc6908bafea34e5f75d,https://vo_agam_output.cog.sanger.ac.uk/AB0171-Cx.gatk.zarr.zip,bd09d0f5c3480d41eb38dce23d79f626,vr-pipe-vo-agam,AB0171,sequenced,False,False,ERS224067,BF3-66,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0172-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0172-Cx.bam,ff158aa8cf87bc4ba30590dd4328d10d,https://vo_agam_output.cog.sanger.ac.uk/AB0172-Cx.vcf.gz,238287e46f913496aea61a17d323cd56,https://vo_agam_output.cog.sanger.ac.uk/AB0172-Cx.gatk.zarr.zip,a458d0df047501747e6c9436f552ebb4,vr-pipe-vo-agam,AB0172,sequenced,False,False,ERS223916,BF10-1,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0173-C,https://vo_agam_output.cog.sanger.ac.uk/AB0173-C.bam,6efa3e445b39b8df9fc1109ca6d325a7,https://vo_agam_output.cog.sanger.ac.uk/AB0173-C.vcf.gz,635a4721399b5d80bb1272b423e4577c,https://vo_agam_output.cog.sanger.ac.uk/AB0173-C.gatk.zarr.zip,b1dd1606d14041da9bc9c00ff8766452,vr-pipe-vo-agam,AB0173,sequenced,True,True,ERS224772,BF10-2,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0174-C,https://vo_agam_output.cog.sanger.ac.uk/AB0174-C.bam,317be081731519c3c78ffbe40a031261,https://vo_agam_output.cog.sanger.ac.uk/AB0174-C.vcf.gz,f43cb6c325c971b1a1d4be6baff604a7,https://vo_agam_output.cog.sanger.ac.uk/AB0174-C.gatk.zarr.zip,f9c8b5b5df3f72c4adb2cc25f2e21d81,vr-pipe-vo-agam,AB0174,sequenced,True,True,ERS224039,BF10-3,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0175-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0175-Cx.bam,a74cc06428d70d6823951597016a443c,https://vo_agam_output.cog.sanger.ac.uk/AB0175-Cx.vcf.gz,b836bbdf4eaeffa03422c31dce1a8488,https://vo_agam_output.cog.sanger.ac.uk/AB0175-Cx.gatk.zarr.zip,dbe10e3b65bbff0771859e5b042d7a03,vr-pipe-vo-agam,AB0175,sequenced,False,False,ERS224070,BF10-4,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0176-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0176-Cx.bam,fa05c695b6dc2eb35acb483ad94a13f3,https://vo_agam_output.cog.sanger.ac.uk/AB0176-Cx.vcf.gz,e9b7b090b651aa5a1f7e0da9af50a10e,https://vo_agam_output.cog.sanger.ac.uk/AB0176-Cx.gatk.zarr.zip,3ac66abc4919e21cbda88ea2e30c803b,vr-pipe-vo-agam,AB0176,sequenced,False,False,ERS224022,BF10-5,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0177-C,https://vo_agam_output.cog.sanger.ac.uk/AB0177-C.bam,0ec2e31970e6db2544536ec1a14d74bf,https://vo_agam_output.cog.sanger.ac.uk/AB0177-C.vcf.gz,ba6f516b911e0d4496c7c725c359a100,https://vo_agam_output.cog.sanger.ac.uk/AB0177-C.gatk.zarr.zip,c1e73cca4fe6b19f6de1bb2b63078e00,vr-pipe-vo-agam,AB0177,sequenced,True,True,ERS224778,BF10-7,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0178-C,https://vo_agam_output.cog.sanger.ac.uk/AB0178-C.bam,3cb7df20803b6ed813926b809fb55231,https://vo_agam_output.cog.sanger.ac.uk/AB0178-C.vcf.gz,4083741231ca68b6c5d6b77102d7cd31,https://vo_agam_output.cog.sanger.ac.uk/AB0178-C.gatk.zarr.zip,557c3c0abf25895d1fea142eca10ece6,vr-pipe-vo-agam,AB0178,sequenced,True,True,ERS224074,BF10-8,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0179-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0179-Cx.bam,f7cd9c801f7ed3bc55af1946f8b61b99,https://vo_agam_output.cog.sanger.ac.uk/AB0179-Cx.vcf.gz,7bb255345a478e9069e45ae696af518d,https://vo_agam_output.cog.sanger.ac.uk/AB0179-Cx.gatk.zarr.zip,483615b1d1d4987ea0a3f6d82181b774,vr-pipe-vo-agam,AB0179,sequenced,False,False,ERS224055,BF10-9,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0180-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0180-Cx.bam,0e6aca3ab613ee88e55b73410ead630d,https://vo_agam_output.cog.sanger.ac.uk/AB0180-Cx.vcf.gz,1adb03f61bf3ca5fd1db9b2eded83296,https://vo_agam_output.cog.sanger.ac.uk/AB0180-Cx.gatk.zarr.zip,93d6e5fec65d3d4e914fe86bb612854f,vr-pipe-vo-agam,AB0180,sequenced,False,False,ERS224028,BF10-10,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0181-C,https://vo_agam_output.cog.sanger.ac.uk/AB0181-C.bam,93c58aea3036cbb730eff59a5426462d,https://vo_agam_output.cog.sanger.ac.uk/AB0181-C.vcf.gz,8f7bf82131cad90255b611fff8bd7f86,https://vo_agam_output.cog.sanger.ac.uk/AB0181-C.gatk.zarr.zip,6e625808a2aad13f50b772596837630a,vr-pipe-vo-agam,AB0181,sequenced,True,True,ERS223761,BFBana 4.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0182-C,https://vo_agam_output.cog.sanger.ac.uk/AB0182-C.bam,db01658baf176a51693271b4af7e0482,https://vo_agam_output.cog.sanger.ac.uk/AB0182-C.vcf.gz,02ee17fa64dffc8701acd599e2737cc7,https://vo_agam_output.cog.sanger.ac.uk/AB0182-C.gatk.zarr.zip,214faadaee52df9b35be6fd867f198f6,vr-pipe-vo-agam,AB0182,sequenced,True,True,ERS223922,BFBana 4.3,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0183-C,https://vo_agam_output.cog.sanger.ac.uk/AB0183-C.bam,5402b8fed42061df04e0fd7b08039be6,https://vo_agam_output.cog.sanger.ac.uk/AB0183-C.vcf.gz,1b081777e98b1a1f855fc3de92e1f85d,https://vo_agam_output.cog.sanger.ac.uk/AB0183-C.gatk.zarr.zip,30681acc63b2b2160cdb328f8d825f9d,vr-pipe-vo-agam,AB0183,sequenced,True,True,ERS223994,BFBana5.1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0184-C,https://vo_agam_output.cog.sanger.ac.uk/AB0184-C.bam,aa7332b5fb417090a4f876214c336a95,https://vo_agam_output.cog.sanger.ac.uk/AB0184-C.vcf.gz,ca8e58c274635b0e1ffd229da47a7f99,https://vo_agam_output.cog.sanger.ac.uk/AB0184-C.gatk.zarr.zip,fcec063ad420e91d9553322055ce1e0f,vr-pipe-vo-agam,AB0184,sequenced,True,True,ERS223843,BFBana 5.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0185-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0185-Cx.bam,c4c5b5a6be049097e57c86cbfd1ae8ba,https://vo_agam_output.cog.sanger.ac.uk/AB0185-Cx.vcf.gz,c98ec91fcb1d20fe52c16f38f6227e98,https://vo_agam_output.cog.sanger.ac.uk/AB0185-Cx.gatk.zarr.zip,71ff7326edf0acb9fc083fe2f3a8c1b9,vr-pipe-vo-agam,AB0185,sequenced,False,False,ERS223844,BFBana 6.1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0186-C,https://vo_agam_output.cog.sanger.ac.uk/AB0186-C.bam,e84af3370df468c51972bfa960a97f1d,https://vo_agam_output.cog.sanger.ac.uk/AB0186-C.vcf.gz,08f6eb5adea25ba036e130aadc5af681,https://vo_agam_output.cog.sanger.ac.uk/AB0186-C.gatk.zarr.zip,f123fd407fa64dd1fcbeb6ea9b6d25e5,vr-pipe-vo-agam,AB0186,sequenced,True,True,ERS223938,BFBana 6.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0187-C,https://vo_agam_output.cog.sanger.ac.uk/AB0187-C.bam,97397018da241385ae1325adfe462ea5,https://vo_agam_output.cog.sanger.ac.uk/AB0187-C.vcf.gz,fa8da7453cbac76194459265d39801f8,https://vo_agam_output.cog.sanger.ac.uk/AB0187-C.gatk.zarr.zip,7edabaab4c2a7a2d25e8c1a274a22a92,vr-pipe-vo-agam,AB0187,sequenced,True,True,ERS223971,BFBana 7.1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0188-C,https://vo_agam_output.cog.sanger.ac.uk/AB0188-C.bam,bc0b2b30e3bd6bf7b319a0288153ccb7,https://vo_agam_output.cog.sanger.ac.uk/AB0188-C.vcf.gz,8d14a50f2629c316f34bdd3b9c652570,https://vo_agam_output.cog.sanger.ac.uk/AB0188-C.gatk.zarr.zip,ecb5ea7e1f7d77ca3f577312116a75a9,vr-pipe-vo-agam,AB0188,sequenced,True,True,ERS223923,BFBana 7.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0189-C,https://vo_agam_output.cog.sanger.ac.uk/AB0189-C.bam,e6cd4d81cccfe886be1907e70c0fed18,https://vo_agam_output.cog.sanger.ac.uk/AB0189-C.vcf.gz,440851b6d1d71e9947fc0118ef5c071b,https://vo_agam_output.cog.sanger.ac.uk/AB0189-C.gatk.zarr.zip,a6e0f82cf3abfaf41d1d261223144222,vr-pipe-vo-agam,AB0189,sequenced,True,True,ERS223794,BFBana 8.1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0190-C,https://vo_agam_output.cog.sanger.ac.uk/AB0190-C.bam,c53048c860587df2f0ad8e0cdc907851,https://vo_agam_output.cog.sanger.ac.uk/AB0190-C.vcf.gz,2efe2b7a187b83394d9c979a9ccd5150,https://vo_agam_output.cog.sanger.ac.uk/AB0190-C.gatk.zarr.zip,42c604f9516ee8285a6081002e92b255,vr-pipe-vo-agam,AB0190,sequenced,True,True,ERS223749,BFBana 8.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0191-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0191-Cx.bam,7e20cd3ea9789ea48c27f164a7be2a28,https://vo_agam_output.cog.sanger.ac.uk/AB0191-Cx.vcf.gz,0afd5ec41b86d766f010c7be39256b1e,https://vo_agam_output.cog.sanger.ac.uk/AB0191-Cx.gatk.zarr.zip,39a2dcfb1e4976cc44fae3c45d194763,vr-pipe-vo-agam,AB0191,sequenced,False,False,ERS224078,BFBana 17.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0192-C,https://vo_agam_output.cog.sanger.ac.uk/AB0192-C.bam,6a00ba064867381da2a81d6cd9189a83,https://vo_agam_output.cog.sanger.ac.uk/AB0192-C.vcf.gz,e7cbe8b461cc271ad6a8d24460d9d663,https://vo_agam_output.cog.sanger.ac.uk/AB0192-C.gatk.zarr.zip,6d0818ae52e58bd6c3a9b305b0c8e4ee,vr-pipe-vo-agam,AB0192,sequenced,True,True,ERS223933,BFBana 17.3,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0193-C,https://vo_agam_output.cog.sanger.ac.uk/AB0193-C.bam,6754c406437f174fdff3e83bb605f752,https://vo_agam_output.cog.sanger.ac.uk/AB0193-C.vcf.gz,ef5f393c54d7759581c36b575fd89fe1,https://vo_agam_output.cog.sanger.ac.uk/AB0193-C.gatk.zarr.zip,d8484c8136cabc9e6bd08f61f052d014,vr-pipe-vo-agam,AB0193,sequenced,True,True,ERS223940,BFBana 18.1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0194-C,https://vo_agam_output.cog.sanger.ac.uk/AB0194-C.bam,707111a2e850a5c7281c539563804e73,https://vo_agam_output.cog.sanger.ac.uk/AB0194-C.vcf.gz,f579147c2ab9c730c180521cde17eb2b,https://vo_agam_output.cog.sanger.ac.uk/AB0194-C.gatk.zarr.zip,fa0c5ac24b9feca169d1fd924295a2a8,vr-pipe-vo-agam,AB0194,sequenced,True,True,ERS223816,BFBana 18.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0196-C,https://vo_agam_output.cog.sanger.ac.uk/AB0196-C.bam,5d3a9731703ba3e2beec653e5dab3c9c,https://vo_agam_output.cog.sanger.ac.uk/AB0196-C.vcf.gz,4a108c352d5fc8ce7ecfe8219119f2ba,https://vo_agam_output.cog.sanger.ac.uk/AB0196-C.gatk.zarr.zip,396fe56ddabf8610bfe9cf1ccb9fbd2c,vr-pipe-vo-agam,AB0196,sequenced,False,True,ERS224007,BFBana 19.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0197-C,https://vo_agam_output.cog.sanger.ac.uk/AB0197-C.bam,c7eb98e3c7d45e5d721500e6b70c582d,https://vo_agam_output.cog.sanger.ac.uk/AB0197-C.vcf.gz,3105d890032e44d67b44fc465b70986d,https://vo_agam_output.cog.sanger.ac.uk/AB0197-C.gatk.zarr.zip,f49e8ee91656682b1788d47129ba9a91,vr-pipe-vo-agam,AB0197,sequenced,True,True,ERS223875,BFPala 36.1,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0198-C,https://vo_agam_output.cog.sanger.ac.uk/AB0198-C.bam,0024629b079c77a5471a7d69fe26e3fe,https://vo_agam_output.cog.sanger.ac.uk/AB0198-C.vcf.gz,4fb2865b352de999b824e35953883c1a,https://vo_agam_output.cog.sanger.ac.uk/AB0198-C.gatk.zarr.zip,bd349fcb9de0337dc1a16ff349ad746a,vr-pipe-vo-agam,AB0198,sequenced,True,True,ERS223967,BFPala 36.2,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0199-C,https://vo_agam_output.cog.sanger.ac.uk/AB0199-C.bam,26bf055b0b48bdc54d3d03ff92dd7226,https://vo_agam_output.cog.sanger.ac.uk/AB0199-C.vcf.gz,2d408abe2b8384bc880958f93874b6d9,https://vo_agam_output.cog.sanger.ac.uk/AB0199-C.gatk.zarr.zip,c9536f9b7e12cd0b4c6fa8673a1982a9,vr-pipe-vo-agam,AB0199,sequenced,True,True,ERS224069,BFPala 36.3,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0200-C,https://vo_agam_output.cog.sanger.ac.uk/AB0200-C.bam,83327b9a02656f6d46669b0a6bdac8bf,https://vo_agam_output.cog.sanger.ac.uk/AB0200-C.vcf.gz,2fa6b3b339b0458adec44a330893229f,https://vo_agam_output.cog.sanger.ac.uk/AB0200-C.gatk.zarr.zip,a11a426ad609c9407be4ef51cce7a40d,vr-pipe-vo-agam,AB0200,sequenced,False,True,ERS224034,BFPala 36.4,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0201-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0201-Cx.bam,94222cb4c78b04bc02f228cfb66ee0af,https://vo_agam_output.cog.sanger.ac.uk/AB0201-Cx.vcf.gz,aa12f744af55b36115e5dbab597ebb90,https://vo_agam_output.cog.sanger.ac.uk/AB0201-Cx.gatk.zarr.zip,0c2c4e12743f2297b08387c3b305f3ed,vr-pipe-vo-agam,AB0201,sequenced,False,False,ERS224000,BFPala 48.1,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0202-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0202-Cx.bam,032e9b45e10efa720cb218bc3b668fa1,https://vo_agam_output.cog.sanger.ac.uk/AB0202-Cx.vcf.gz,6e084d7714e2eb272023c1f52a166724,https://vo_agam_output.cog.sanger.ac.uk/AB0202-Cx.gatk.zarr.zip,4f91e0b24982ed4dfd4bc4b28f0d032f,vr-pipe-vo-agam,AB0202,sequenced,False,False,ERS224042,BFPala 48.2,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0203-C,https://vo_agam_output.cog.sanger.ac.uk/AB0203-C.bam,0ccb0911501b3327b0df191af723f5a0,https://vo_agam_output.cog.sanger.ac.uk/AB0203-C.vcf.gz,c8662253fa394d1efb857e7db6c5f6c9,https://vo_agam_output.cog.sanger.ac.uk/AB0203-C.gatk.zarr.zip,dbdc06b4e99e3c0cc1cc25db5f1462a6,vr-pipe-vo-agam,AB0203,sequenced,True,True,ERS224029,BFPala 48.3,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0204-C,https://vo_agam_output.cog.sanger.ac.uk/AB0204-C.bam,50a7c6f70cbec28d7e468b60839503da,https://vo_agam_output.cog.sanger.ac.uk/AB0204-C.vcf.gz,234578d8e91e3c413ab2019195a7d869,https://vo_agam_output.cog.sanger.ac.uk/AB0204-C.gatk.zarr.zip,4827dfeea69b1cdd4cc194eb086223f9,vr-pipe-vo-agam,AB0204,sequenced,True,True,ERS223946,BFPala 58.1,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0205-C,https://vo_agam_output.cog.sanger.ac.uk/AB0205-C.bam,dab2dc13759e24e2cd40dacc252e06f8,https://vo_agam_output.cog.sanger.ac.uk/AB0205-C.vcf.gz,4f1d2822e579f9a0daddbb584f2454a1,https://vo_agam_output.cog.sanger.ac.uk/AB0205-C.gatk.zarr.zip,764f74cb8cd99f63be0818d2dc1fca80,vr-pipe-vo-agam,AB0205,sequenced,True,True,ERS223790,BFPala 58.2,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0206-C,https://vo_agam_output.cog.sanger.ac.uk/AB0206-C.bam,9e7c02155188a3e7760379ed2f5f5c10,https://vo_agam_output.cog.sanger.ac.uk/AB0206-C.vcf.gz,61eabbc50d12b3ea8c5056bf55ab35ab,https://vo_agam_output.cog.sanger.ac.uk/AB0206-C.gatk.zarr.zip,c2e260f60e3578adc0e2891969b88140,vr-pipe-vo-agam,AB0206,sequenced,True,True,ERS223797,BFPala 58.3,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0207-C,https://vo_agam_output.cog.sanger.ac.uk/AB0207-C.bam,88f15206f0e866b5198506c076a962dc,https://vo_agam_output.cog.sanger.ac.uk/AB0207-C.vcf.gz,c744e942349a97aafebc835f0a1c0804,https://vo_agam_output.cog.sanger.ac.uk/AB0207-C.gatk.zarr.zip,742c68ed32ea1b30e52a47bdd875c47b,vr-pipe-vo-agam,AB0207,sequenced,True,True,ERS223771,BFSour 59.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0208-C,https://vo_agam_output.cog.sanger.ac.uk/AB0208-C.bam,40da85f0e21d9236ce95afe99c07da13,https://vo_agam_output.cog.sanger.ac.uk/AB0208-C.vcf.gz,59fc8c69296fd5174e30cbfc37fc5b7a,https://vo_agam_output.cog.sanger.ac.uk/AB0208-C.gatk.zarr.zip,69045ff17015e080dd2c3ccf7cca0c25,vr-pipe-vo-agam,AB0208,sequenced,True,True,ERS223827,BFSour 59.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0209-C,https://vo_agam_output.cog.sanger.ac.uk/AB0209-C.bam,9015d252822fdb597917793b4a9f8f53,https://vo_agam_output.cog.sanger.ac.uk/AB0209-C.vcf.gz,787737efdce46dcc771ac1db7f2a160a,https://vo_agam_output.cog.sanger.ac.uk/AB0209-C.gatk.zarr.zip,67840ba9295a2e8cb5e9f7cffa4e49d6,vr-pipe-vo-agam,AB0209,sequenced,True,True,ERS223861,BFBana 14.1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0210-C,https://vo_agam_output.cog.sanger.ac.uk/AB0210-C.bam,be9aa6b2197548ad17129f2f9a38b7b8,https://vo_agam_output.cog.sanger.ac.uk/AB0210-C.vcf.gz,613440f1ffa9d62507b62b969fc5725e,https://vo_agam_output.cog.sanger.ac.uk/AB0210-C.gatk.zarr.zip,1a48b6a538c9e1d4daf5af8f1df48a0e,vr-pipe-vo-agam,AB0210,sequenced,True,True,ERS223873,BFBana 14.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0211-C,https://vo_agam_output.cog.sanger.ac.uk/AB0211-C.bam,a5bde216670e0dc434d86d806df75a33,https://vo_agam_output.cog.sanger.ac.uk/AB0211-C.vcf.gz,5fb1047239264df249e450d981b8e1a9,https://vo_agam_output.cog.sanger.ac.uk/AB0211-C.gatk.zarr.zip,a49f95f386fb29a63dd21510c78e136a,vr-pipe-vo-agam,AB0211,sequenced,True,True,ERS223759,BFBana 16.1,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0212-C,https://vo_agam_output.cog.sanger.ac.uk/AB0212-C.bam,6ef9fcec68404b4f3e63f8a0f45c70bb,https://vo_agam_output.cog.sanger.ac.uk/AB0212-C.vcf.gz,a045c4c691a7a41f71fa6b709fffc97a,https://vo_agam_output.cog.sanger.ac.uk/AB0212-C.gatk.zarr.zip,7a43b6652bc39418c784cf9ca33f592f,vr-pipe-vo-agam,AB0212,sequenced,True,True,ERS223909,BFBana 16.2,Austin Burt,Burkina Faso,Bana,2012,7,AG1000G-BF-A +AB0213-C,https://vo_agam_output.cog.sanger.ac.uk/AB0213-C.bam,2fdbde63a863f9bfd2d81d44aad3bde9,https://vo_agam_output.cog.sanger.ac.uk/AB0213-C.vcf.gz,97eff08882d5b46a85e8c603a37a428b,https://vo_agam_output.cog.sanger.ac.uk/AB0213-C.gatk.zarr.zip,b89ca72878c8f79eac97c2a24fa78f9f,vr-pipe-vo-agam,AB0213,sequenced,True,True,ERS223756,BFSour 59.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0214-C,https://vo_agam_output.cog.sanger.ac.uk/AB0214-C.bam,0455f59a880b0154d42bd78a8d1c20e0,https://vo_agam_output.cog.sanger.ac.uk/AB0214-C.vcf.gz,73fd808977c9fdf9102474ea3b991e0d,https://vo_agam_output.cog.sanger.ac.uk/AB0214-C.gatk.zarr.zip,2c3ced4cb6ac916999ad976997054ab2,vr-pipe-vo-agam,AB0214,sequenced,True,True,ERS223866,BFSour 46.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0215-C,https://vo_agam_output.cog.sanger.ac.uk/AB0215-C.bam,d82b0153e4b88ba3ec0640334feab75a,https://vo_agam_output.cog.sanger.ac.uk/AB0215-C.vcf.gz,e0a94ffd91a2d71572a381210ae37088,https://vo_agam_output.cog.sanger.ac.uk/AB0215-C.gatk.zarr.zip,fc5a2c010e1c23702a8de6ccaaedfe0e,vr-pipe-vo-agam,AB0215,sequenced,False,True,ERS223778,BFSour 46.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0216-C,https://vo_agam_output.cog.sanger.ac.uk/AB0216-C.bam,48693199a45a5f4e0ee37350de844288,https://vo_agam_output.cog.sanger.ac.uk/AB0216-C.vcf.gz,6918eafecb54618ec6168f728829fa79,https://vo_agam_output.cog.sanger.ac.uk/AB0216-C.gatk.zarr.zip,5ffd4fb34be9f36a0d6d13391e6e9290,vr-pipe-vo-agam,AB0216,sequenced,True,True,ERS223793,BFSour 47.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0217-C,https://vo_agam_output.cog.sanger.ac.uk/AB0217-C.bam,8a997638ba392fc1ae3d8c13018a88ea,https://vo_agam_output.cog.sanger.ac.uk/AB0217-C.vcf.gz,05c9243454522f4d5a45a81b6212d02e,https://vo_agam_output.cog.sanger.ac.uk/AB0217-C.gatk.zarr.zip,792364a93ee7589deffb50370e1381f1,vr-pipe-vo-agam,AB0217,sequenced,True,True,ERS223750,BFSour 54.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0218-C,https://vo_agam_output.cog.sanger.ac.uk/AB0218-C.bam,6ca4b8983f09622cb3b12afc7ced79ec,https://vo_agam_output.cog.sanger.ac.uk/AB0218-C.vcf.gz,fe827e6cf64aef3ea83a09c71690d8cc,https://vo_agam_output.cog.sanger.ac.uk/AB0218-C.gatk.zarr.zip,666ef673894745115b38b19075c90b0f,vr-pipe-vo-agam,AB0218,sequenced,False,True,ERS223894,BFSour 54.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0219-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0219-Cx.bam,9d671f397995f41b381213d9522f687e,https://vo_agam_output.cog.sanger.ac.uk/AB0219-Cx.vcf.gz,aeccd0cc013c15efe93fd185898956d1,https://vo_agam_output.cog.sanger.ac.uk/AB0219-Cx.gatk.zarr.zip,1e8b86e246ae0444b24d0db07a4203bd,vr-pipe-vo-agam,AB0219,sequenced,False,False,ERS223910,BFSour 54.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0221-C,https://vo_agam_output.cog.sanger.ac.uk/AB0221-C.bam,8525f2d2f298b2eb337c71524d8a67a0,https://vo_agam_output.cog.sanger.ac.uk/AB0221-C.vcf.gz,38f553ce4bd92ac3977670c4cc121995,https://vo_agam_output.cog.sanger.ac.uk/AB0221-C.gatk.zarr.zip,4bc791d905b4b843460cfeda877b59ca,vr-pipe-vo-agam,AB0221,sequenced,True,True,ERS223757,BFSour 55.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0222-C,https://vo_agam_output.cog.sanger.ac.uk/AB0222-C.bam,1e233c2084b04112e494cd693ff3b199,https://vo_agam_output.cog.sanger.ac.uk/AB0222-C.vcf.gz,298931c7eceee0cabb43b0b7c4e61e39,https://vo_agam_output.cog.sanger.ac.uk/AB0222-C.gatk.zarr.zip,a6c6bcece7e0b99868f69f004e21a623,vr-pipe-vo-agam,AB0222,sequenced,True,True,ERS223990,BFSour 55.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0223-C,https://vo_agam_output.cog.sanger.ac.uk/AB0223-C.bam,4eb038a0adea379c764101abb243f145,https://vo_agam_output.cog.sanger.ac.uk/AB0223-C.vcf.gz,23aca11440cb4802a06efe0160397134,https://vo_agam_output.cog.sanger.ac.uk/AB0223-C.gatk.zarr.zip,24deea2cd834056fbececc986c80e069,vr-pipe-vo-agam,AB0223,sequenced,True,True,ERS223867,BFSour 56.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0224-C,https://vo_agam_output.cog.sanger.ac.uk/AB0224-C.bam,2694915c4d2a96103ae3f82432546b2b,https://vo_agam_output.cog.sanger.ac.uk/AB0224-C.vcf.gz,2704370f2cce7eac0559c271ae65aa39,https://vo_agam_output.cog.sanger.ac.uk/AB0224-C.gatk.zarr.zip,ae99b2891ca062484830736abea6960d,vr-pipe-vo-agam,AB0224,sequenced,True,True,ERS223804,BFSour 57.1,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0225-C,https://vo_agam_output.cog.sanger.ac.uk/AB0225-C.bam,0afa76090de8a518c7966a1de83b1d22,https://vo_agam_output.cog.sanger.ac.uk/AB0225-C.vcf.gz,620eaa23e05a77f4a7bcacc1a1e088c3,https://vo_agam_output.cog.sanger.ac.uk/AB0225-C.gatk.zarr.zip,d0962e54400eed5ef215ea2a3222d221,vr-pipe-vo-agam,AB0225,sequenced,True,True,ERS223811,BFSour 57.2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0226-C,https://vo_agam_output.cog.sanger.ac.uk/AB0226-C.bam,93dd7d7299f46bb41a80dd90686e7f6c,https://vo_agam_output.cog.sanger.ac.uk/AB0226-C.vcf.gz,065d67195601228580f5b0768df854ff,https://vo_agam_output.cog.sanger.ac.uk/AB0226-C.gatk.zarr.zip,5b46951873b3edd4bf4b7929f5e5085b,vr-pipe-vo-agam,AB0226,sequenced,True,True,ERS223818,BFSour 57.3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0227-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0227-Cx.bam,cdb4c0e929c76321376fb639566fda5c,https://vo_agam_output.cog.sanger.ac.uk/AB0227-Cx.vcf.gz,d3a34364a37ee6256655813f66f4b916,https://vo_agam_output.cog.sanger.ac.uk/AB0227-Cx.gatk.zarr.zip,fddb5f7ccded0ec5848839e37f574a3b,vr-pipe-vo-agam,AB0227,sequenced,False,False,ERS223934,BF11-2,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0228-C,https://vo_agam_output.cog.sanger.ac.uk/AB0228-C.bam,00a2f79378af37b7609ad2a160e31eb3,https://vo_agam_output.cog.sanger.ac.uk/AB0228-C.vcf.gz,9bb813cd73442f4d1e53ad275a32638c,https://vo_agam_output.cog.sanger.ac.uk/AB0228-C.gatk.zarr.zip,7b0750fe22f21a07333b9327da21fe1b,vr-pipe-vo-agam,AB0228,sequenced,True,True,ERS224037,BF11-3,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0229-C,https://vo_agam_output.cog.sanger.ac.uk/AB0229-C.bam,864d9784ff963438f72808b008ccc966,https://vo_agam_output.cog.sanger.ac.uk/AB0229-C.vcf.gz,29979f599777047a577d346bcef09390,https://vo_agam_output.cog.sanger.ac.uk/AB0229-C.gatk.zarr.zip,bbe01c0080ad71a56ba402e98b5c17a5,vr-pipe-vo-agam,AB0229,sequenced,True,True,ERS223951,BF11-4,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0230-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0230-Cx.bam,e1218a2f52a9db6e5fcdb14ca354041e,https://vo_agam_output.cog.sanger.ac.uk/AB0230-Cx.vcf.gz,9179feee2079ead1aa0aa804722bc1c9,https://vo_agam_output.cog.sanger.ac.uk/AB0230-Cx.gatk.zarr.zip,a2baf3171c541d2e94e81c11a71d90f5,vr-pipe-vo-agam,AB0230,sequenced,False,False,ERS224147,BF11-6,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0231-C,https://vo_agam_output.cog.sanger.ac.uk/AB0231-C.bam,8e0d5fecb9accb35867da6328caca03d,https://vo_agam_output.cog.sanger.ac.uk/AB0231-C.vcf.gz,d44f5dd4d788f20129e700f85274fea6,https://vo_agam_output.cog.sanger.ac.uk/AB0231-C.gatk.zarr.zip,c68e38e42d1882a50a3deb887de5d5fd,vr-pipe-vo-agam,AB0231,sequenced,True,True,ERS224102,BF11-7,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0232-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0232-Cx.bam,756f3d3af3533def6997ed55a477a897,https://vo_agam_output.cog.sanger.ac.uk/AB0232-Cx.vcf.gz,6fcd977e1906314df60edade31cb6347,https://vo_agam_output.cog.sanger.ac.uk/AB0232-Cx.gatk.zarr.zip,615d40dc7d241544e02de7c720fcbcb3,vr-pipe-vo-agam,AB0232,sequenced,False,False,ERS224085,BF11-8,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0233-C,https://vo_agam_output.cog.sanger.ac.uk/AB0233-C.bam,0c66f5460e4cd78d61fae9db759074cb,https://vo_agam_output.cog.sanger.ac.uk/AB0233-C.vcf.gz,12f03234e17a2d3c4def8087f8558424,https://vo_agam_output.cog.sanger.ac.uk/AB0233-C.gatk.zarr.zip,0b5bac912787667105060d4f788bac09,vr-pipe-vo-agam,AB0233,sequenced,True,True,ERS224118,BF11-9,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0234-C,https://vo_agam_output.cog.sanger.ac.uk/AB0234-C.bam,4b40d760a152e11568085cb6c0709bdd,https://vo_agam_output.cog.sanger.ac.uk/AB0234-C.vcf.gz,66abdd6975ae500e1b7f2e75a17608d4,https://vo_agam_output.cog.sanger.ac.uk/AB0234-C.gatk.zarr.zip,1085666c391a83091e7a7f5a2f928a5c,vr-pipe-vo-agam,AB0234,sequenced,True,True,ERS224096,BF11-10,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0235-C,https://vo_agam_output.cog.sanger.ac.uk/AB0235-C.bam,80cce06892c12e2a9fbf6875ef72af30,https://vo_agam_output.cog.sanger.ac.uk/AB0235-C.vcf.gz,d10c7e8ae610ae249ed4249b879539c0,https://vo_agam_output.cog.sanger.ac.uk/AB0235-C.gatk.zarr.zip,376baecec5a2971700f17a794024328b,vr-pipe-vo-agam,AB0235,sequenced,True,True,ERS224211,BF11-11,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0236-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0236-Cx.bam,da8caf8e38ead51e63337eb1069b3298,https://vo_agam_output.cog.sanger.ac.uk/AB0236-Cx.vcf.gz,ad457ce44be56eac378c861a2755ade2,https://vo_agam_output.cog.sanger.ac.uk/AB0236-Cx.gatk.zarr.zip,c66671717f665ea28be743a539bbb4a6,vr-pipe-vo-agam,AB0236,sequenced,False,False,ERS224132,BF11-12,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0237-C,https://vo_agam_output.cog.sanger.ac.uk/AB0237-C.bam,f3f5009c1cccc65ed57436323180ccca,https://vo_agam_output.cog.sanger.ac.uk/AB0237-C.vcf.gz,a70a16fa873915451f7a96798a596a81,https://vo_agam_output.cog.sanger.ac.uk/AB0237-C.gatk.zarr.zip,65b9437e3162ed510bec66376ca23288,vr-pipe-vo-agam,AB0237,sequenced,True,True,ERS224094,BF11-13,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0238-C,https://vo_agam_output.cog.sanger.ac.uk/AB0238-C.bam,cdf42cf2663157c8b61a9e2243e9d758,https://vo_agam_output.cog.sanger.ac.uk/AB0238-C.vcf.gz,2e8dd845ba110d027859821bad1302a7,https://vo_agam_output.cog.sanger.ac.uk/AB0238-C.gatk.zarr.zip,6fb0dc5975881bb164ae0e058d4359c8,vr-pipe-vo-agam,AB0238,sequenced,True,True,ERS224227,BF11-14,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0239-C,https://vo_agam_output.cog.sanger.ac.uk/AB0239-C.bam,fbd997dcf19fda5d735a7d46fe362097,https://vo_agam_output.cog.sanger.ac.uk/AB0239-C.vcf.gz,0548d2c29cff92ef8b512d63c33a8e6b,https://vo_agam_output.cog.sanger.ac.uk/AB0239-C.gatk.zarr.zip,0490a454de328c0372697328fed1360c,vr-pipe-vo-agam,AB0239,sequenced,True,True,ERS224203,BF11-15,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0240-C,https://vo_agam_output.cog.sanger.ac.uk/AB0240-C.bam,404de06e6efeb971e042e4b9e5973967,https://vo_agam_output.cog.sanger.ac.uk/AB0240-C.vcf.gz,e24a718d05431f50a3bfc6cd2b1bbd72,https://vo_agam_output.cog.sanger.ac.uk/AB0240-C.gatk.zarr.zip,96c6e2ca34e2a1a78e9bae29c223a516,vr-pipe-vo-agam,AB0240,sequenced,True,True,ERS224169,BF11-16,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0241-C,https://vo_agam_output.cog.sanger.ac.uk/AB0241-C.bam,adb995b5a31f7a92f81943f311aaf8e7,https://vo_agam_output.cog.sanger.ac.uk/AB0241-C.vcf.gz,23681c2856e6d6e2ee05174f80b3f25c,https://vo_agam_output.cog.sanger.ac.uk/AB0241-C.gatk.zarr.zip,3098e997803f1af4c745fbe631a4331c,vr-pipe-vo-agam,AB0241,sequenced,True,True,ERS224195,BF11-17,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0242-C,https://vo_agam_output.cog.sanger.ac.uk/AB0242-C.bam,ce4c249633ea8d7665810d7f9df61328,https://vo_agam_output.cog.sanger.ac.uk/AB0242-C.vcf.gz,81e60f648e021b7fb41dc4fcff42796b,https://vo_agam_output.cog.sanger.ac.uk/AB0242-C.gatk.zarr.zip,ad1623ca3e2fa1e84ecfdd4fc0752a96,vr-pipe-vo-agam,AB0242,sequenced,True,True,ERS224285,BF11-18,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0243-C,https://vo_agam_output.cog.sanger.ac.uk/AB0243-C.bam,5fbd3229dc62a9a15a19e013ef6563d0,https://vo_agam_output.cog.sanger.ac.uk/AB0243-C.vcf.gz,d8ef508eb8b899376085b102392de222,https://vo_agam_output.cog.sanger.ac.uk/AB0243-C.gatk.zarr.zip,0796e3199824aca81622f3368849c38c,vr-pipe-vo-agam,AB0243,sequenced,True,True,ERS224221,BF11-19,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0244-C,https://vo_agam_output.cog.sanger.ac.uk/AB0244-C.bam,babc78d5e2a030214b60ea7d53c2366b,https://vo_agam_output.cog.sanger.ac.uk/AB0244-C.vcf.gz,2348c084e8ddbd31df1148d995140d78,https://vo_agam_output.cog.sanger.ac.uk/AB0244-C.gatk.zarr.zip,77b70473c914ec821313b8a76246dbf7,vr-pipe-vo-agam,AB0244,sequenced,True,True,ERS224283,BF11-20,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0246-C,https://vo_agam_output.cog.sanger.ac.uk/AB0246-C.bam,6b11be53dc6b15854a238d7e163a3b15,https://vo_agam_output.cog.sanger.ac.uk/AB0246-C.vcf.gz,7ceadd2d5e0154cbd78cc332fab11015,https://vo_agam_output.cog.sanger.ac.uk/AB0246-C.gatk.zarr.zip,652166b67e4b4ac017e67ef27f17a094,vr-pipe-vo-agam,AB0246,sequenced,True,True,ERS224254,BF11-22,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0247-C,https://vo_agam_output.cog.sanger.ac.uk/AB0247-C.bam,b0882e147c599b759c99be0e805b2e50,https://vo_agam_output.cog.sanger.ac.uk/AB0247-C.vcf.gz,36df014c64e7da431453808ecae7a8cc,https://vo_agam_output.cog.sanger.ac.uk/AB0247-C.gatk.zarr.zip,f5970099d2240ad58299cf75efcc7bed,vr-pipe-vo-agam,AB0247,sequenced,True,True,ERS224110,BF11-23,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0248-C,https://vo_agam_output.cog.sanger.ac.uk/AB0248-C.bam,be3424b25af36ed50f85de33da8a024c,https://vo_agam_output.cog.sanger.ac.uk/AB0248-C.vcf.gz,cdbe236d3c8776b81999334a9fb1afb7,https://vo_agam_output.cog.sanger.ac.uk/AB0248-C.gatk.zarr.zip,f7dfb6328e33646d8559e1ae7f9149c9,vr-pipe-vo-agam,AB0248,sequenced,False,True,ERS224785,BF11-25,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0249-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0249-Cx.bam,9ae1ca3b11d455e641bdf8f0539ac3ed,https://vo_agam_output.cog.sanger.ac.uk/AB0249-Cx.vcf.gz,1b178b728cade473c580fff3a236ec4d,https://vo_agam_output.cog.sanger.ac.uk/AB0249-Cx.gatk.zarr.zip,93bc31ec69005b671e434e2d286ad2ea,vr-pipe-vo-agam,AB0249,sequenced,False,False,ERS224162,BF11-26,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0250-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0250-Cx.bam,619da60f5de86ebab9e36f48a2355a4f,https://vo_agam_output.cog.sanger.ac.uk/AB0250-Cx.vcf.gz,64db9e51024dc727eae953a447045cea,https://vo_agam_output.cog.sanger.ac.uk/AB0250-Cx.gatk.zarr.zip,cfed28eb078c55a991772eed8b7f9b05,vr-pipe-vo-agam,AB0250,sequenced,False,False,ERS224213,BF11-27,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0251-C,https://vo_agam_output.cog.sanger.ac.uk/AB0251-C.bam,c11b577356fcb6ce4022d6472dcab748,https://vo_agam_output.cog.sanger.ac.uk/AB0251-C.vcf.gz,a9889e0ee05077a0792428ae5bce3cbb,https://vo_agam_output.cog.sanger.ac.uk/AB0251-C.gatk.zarr.zip,d13a0a4ff0d1c19650bb6bce3a6bf563,vr-pipe-vo-agam,AB0251,sequenced,True,True,ERS224133,BF11-28,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0252-C,https://vo_agam_output.cog.sanger.ac.uk/AB0252-C.bam,b399e634d46dfd346c2280abc5165c98,https://vo_agam_output.cog.sanger.ac.uk/AB0252-C.vcf.gz,c5d567216339fa43f51292edeea8cba2,https://vo_agam_output.cog.sanger.ac.uk/AB0252-C.gatk.zarr.zip,f9115fc4ab576570b88687607bf3a748,vr-pipe-vo-agam,AB0252,sequenced,True,True,ERS224143,BF11-29,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0253-C,https://vo_agam_output.cog.sanger.ac.uk/AB0253-C.bam,b4e08d23637ffc6760c1afb920d38ec1,https://vo_agam_output.cog.sanger.ac.uk/AB0253-C.vcf.gz,54ab0f5d2ca27db3c81f6f656da190e3,https://vo_agam_output.cog.sanger.ac.uk/AB0253-C.gatk.zarr.zip,d002b4926d863bc752b4bf7bec941685,vr-pipe-vo-agam,AB0253,sequenced,True,True,ERS224149,BF11-30,Austin Burt,Burkina Faso,Souroukoudinga,2012,7,AG1000G-BF-A +AB0255-C,https://vo_agam_output.cog.sanger.ac.uk/AB0255-C.bam,7790c6454ab680c5ce75202bc2078751,https://vo_agam_output.cog.sanger.ac.uk/AB0255-C.vcf.gz,1010fe0cc27c5c875671b3bd91664494,https://vo_agam_output.cog.sanger.ac.uk/AB0255-C.gatk.zarr.zip,37d5615e59fb3a07971c2f543aca0404,vr-pipe-vo-agam,AB0255,sequenced,False,True,ERS224783,BF12-2,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0256-C,https://vo_agam_output.cog.sanger.ac.uk/AB0256-C.bam,3abf716c7b3baaeecf5b6e107e48355e,https://vo_agam_output.cog.sanger.ac.uk/AB0256-C.vcf.gz,a2c8c706efbf31416fa758697b6991c1,https://vo_agam_output.cog.sanger.ac.uk/AB0256-C.gatk.zarr.zip,11a7fe095df323dce99530a31e808911,vr-pipe-vo-agam,AB0256,sequenced,True,True,ERS224784,BF12-3,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0257-C,https://vo_agam_output.cog.sanger.ac.uk/AB0257-C.bam,adfed5567b50317933930343e2fa3512,https://vo_agam_output.cog.sanger.ac.uk/AB0257-C.vcf.gz,32e53082ba5df0c71073e3e8e24937e2,https://vo_agam_output.cog.sanger.ac.uk/AB0257-C.gatk.zarr.zip,52d30acdbc88bd8610a90927f6d1375c,vr-pipe-vo-agam,AB0257,sequenced,True,True,ERS224174,BF12-4,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0258-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0258-Cx.bam,bda942ed9c102c79c103dd5dce979a30,https://vo_agam_output.cog.sanger.ac.uk/AB0258-Cx.vcf.gz,f2eff710443be555d0aa123cc76e951c,https://vo_agam_output.cog.sanger.ac.uk/AB0258-Cx.gatk.zarr.zip,bfe874340000c1fe7404ea5f7d6d969e,vr-pipe-vo-agam,AB0258,sequenced,False,False,ERS224238,BF12-5,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0259-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0259-Cx.bam,bdfe7fdb92890757a029b2f7b6ca23de,https://vo_agam_output.cog.sanger.ac.uk/AB0259-Cx.vcf.gz,2e54e50ed5e113c56da9a24691d678f0,https://vo_agam_output.cog.sanger.ac.uk/AB0259-Cx.gatk.zarr.zip,c5fa814dd22fbd5f5c5eb32f4d0695a9,vr-pipe-vo-agam,AB0259,sequenced,False,False,ERS224125,BF12-6,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0260-C,https://vo_agam_output.cog.sanger.ac.uk/AB0260-C.bam,716f33f70581a38b14c8705fe5a4a711,https://vo_agam_output.cog.sanger.ac.uk/AB0260-C.vcf.gz,c580eea7b22122cf3068e68624781dca,https://vo_agam_output.cog.sanger.ac.uk/AB0260-C.gatk.zarr.zip,f541f4cce7b38e642316e9cc3ed325fa,vr-pipe-vo-agam,AB0260,sequenced,True,True,ERS224156,BF12-7,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0261-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0261-Cx.bam,efe74c5c102599344954342e49a9114f,https://vo_agam_output.cog.sanger.ac.uk/AB0261-Cx.vcf.gz,1c066b081d6117b88079619501fb6bd8,https://vo_agam_output.cog.sanger.ac.uk/AB0261-Cx.gatk.zarr.zip,4f53b5fac56303b6385b3b3dee6960d8,vr-pipe-vo-agam,AB0261,sequenced,False,False,ERS224656,BF12-8,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0262-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0262-Cx.bam,f4ca1ba30b3847b6498176e6ca3bf70c,https://vo_agam_output.cog.sanger.ac.uk/AB0262-Cx.vcf.gz,b85e88e99b32fe8f3edc7bdafd5fab43,https://vo_agam_output.cog.sanger.ac.uk/AB0262-Cx.gatk.zarr.zip,e77ccb30e710de1b4d8ca3ebdd95615d,vr-pipe-vo-agam,AB0262,sequenced,False,False,ERS224210,BF12-9,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0263-C,https://vo_agam_output.cog.sanger.ac.uk/AB0263-C.bam,d8e732f6d22d3a7a6a3fbd86fc8f0403,https://vo_agam_output.cog.sanger.ac.uk/AB0263-C.vcf.gz,fb0181761cbfea52d476201d1d386b76,https://vo_agam_output.cog.sanger.ac.uk/AB0263-C.gatk.zarr.zip,edcfc97849ff02b7f139739b03aa53df,vr-pipe-vo-agam,AB0263,sequenced,True,True,ERS224223,BF12-10,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0264-C,https://vo_agam_output.cog.sanger.ac.uk/AB0264-C.bam,0eb809a4e01f86f86b6f64f5c2ded013,https://vo_agam_output.cog.sanger.ac.uk/AB0264-C.vcf.gz,585859439fb849f259ab7d826793478f,https://vo_agam_output.cog.sanger.ac.uk/AB0264-C.gatk.zarr.zip,88c632bc3a203743b8497594bb2faa70,vr-pipe-vo-agam,AB0264,sequenced,True,True,ERS224114,BF12-13,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0265-C,https://vo_agam_output.cog.sanger.ac.uk/AB0265-C.bam,3c6fe890ed306d83a78e4dc4cfa48a7e,https://vo_agam_output.cog.sanger.ac.uk/AB0265-C.vcf.gz,b86f5de57f161a2c8a95939329670139,https://vo_agam_output.cog.sanger.ac.uk/AB0265-C.gatk.zarr.zip,0f066d06ad0c4671f100bc3a76d91b92,vr-pipe-vo-agam,AB0265,sequenced,True,True,ERS224095,BF12-14,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0266-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0266-Cx.bam,f4332b2b7e03883c137919ea7d4f4cce,https://vo_agam_output.cog.sanger.ac.uk/AB0266-Cx.vcf.gz,fd129a3221df210840d1a159b6608a21,https://vo_agam_output.cog.sanger.ac.uk/AB0266-Cx.gatk.zarr.zip,a7bd264cbef0013862e2cfbb94656556,vr-pipe-vo-agam,AB0266,sequenced,False,False,ERS224782,BF12-15,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0267-C,https://vo_agam_output.cog.sanger.ac.uk/AB0267-C.bam,b89682b414721f3f028d43f8a3c10650,https://vo_agam_output.cog.sanger.ac.uk/AB0267-C.vcf.gz,99223b7fd1e9d83a35645252fe943cdc,https://vo_agam_output.cog.sanger.ac.uk/AB0267-C.gatk.zarr.zip,49880621849ecd16335de01eb12dd066,vr-pipe-vo-agam,AB0267,sequenced,True,True,ERS224228,BF12-16,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0268-C,https://vo_agam_output.cog.sanger.ac.uk/AB0268-C.bam,e77950d4f74ab9ac78b2fc91db594c22,https://vo_agam_output.cog.sanger.ac.uk/AB0268-C.vcf.gz,cd84b85d181259fd2d49e455238a6efa,https://vo_agam_output.cog.sanger.ac.uk/AB0268-C.gatk.zarr.zip,c0f1b256df110c645575af7270804086,vr-pipe-vo-agam,AB0268,sequenced,True,True,ERS224295,BF12-17,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0269-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0269-Cx.bam,93f6cdc1caaa972a79ba1eee24deca14,https://vo_agam_output.cog.sanger.ac.uk/AB0269-Cx.vcf.gz,584d63cb21417088b000b1ff2844d5ab,https://vo_agam_output.cog.sanger.ac.uk/AB0269-Cx.gatk.zarr.zip,129e3d872ef3434ad090ead22bb1fe8c,vr-pipe-vo-agam,AB0269,sequenced,False,False,ERS224100,BF12-18,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0270-C,https://vo_agam_output.cog.sanger.ac.uk/AB0270-C.bam,369381ccef2e4f0515846fdad66e785e,https://vo_agam_output.cog.sanger.ac.uk/AB0270-C.vcf.gz,0c19842737faa5f535e39190523cd7d0,https://vo_agam_output.cog.sanger.ac.uk/AB0270-C.gatk.zarr.zip,7fcb36fc97b6c6f7f42bd103e403d14e,vr-pipe-vo-agam,AB0270,sequenced,True,True,ERS224101,BF12-20,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0271-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0271-Cx.bam,38aa285554e86559d2d9d4f8843d3c71,https://vo_agam_output.cog.sanger.ac.uk/AB0271-Cx.vcf.gz,18e4e96b0bec0c8f0ee5080268665101,https://vo_agam_output.cog.sanger.ac.uk/AB0271-Cx.gatk.zarr.zip,9eb7fb2b60a4a9ce08bcc8e345638055,vr-pipe-vo-agam,AB0271,sequenced,False,False,ERS224148,BF12-22,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0272-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0272-Cx.bam,924d676d8384b8c95789210bb9c3e887,https://vo_agam_output.cog.sanger.ac.uk/AB0272-Cx.vcf.gz,443f3050a52fc30eeda5a779b853c38d,https://vo_agam_output.cog.sanger.ac.uk/AB0272-Cx.gatk.zarr.zip,b916e776d2a2ead80d0b690440cff6e9,vr-pipe-vo-agam,AB0272,sequenced,False,False,ERS224247,BF12-23,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0273-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0273-Cx.bam,2d1a16037cb0244675769043feaad4e7,https://vo_agam_output.cog.sanger.ac.uk/AB0273-Cx.vcf.gz,7a0ca1fa9ceac97a6bf6f65ce88fba73,https://vo_agam_output.cog.sanger.ac.uk/AB0273-Cx.gatk.zarr.zip,7db87c31d8a92b15542e2f1b7ede7154,vr-pipe-vo-agam,AB0273,sequenced,False,False,ERS224154,BF12-24,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0274-C,https://vo_agam_output.cog.sanger.ac.uk/AB0274-C.bam,c22656431d9b0cb930320115889a5604,https://vo_agam_output.cog.sanger.ac.uk/AB0274-C.vcf.gz,8bba18e6c695bb2ba928f9206b2003f4,https://vo_agam_output.cog.sanger.ac.uk/AB0274-C.gatk.zarr.zip,cbb2e04f12c71684cd191e63ea3a2b6e,vr-pipe-vo-agam,AB0274,sequenced,True,True,ERS224294,BF12-25,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0275-C,https://vo_agam_output.cog.sanger.ac.uk/AB0275-C.bam,3fdf5545b3f90c6df9078ea76ac0ca3d,https://vo_agam_output.cog.sanger.ac.uk/AB0275-C.vcf.gz,6d565ec166da5c74439601765417d7e3,https://vo_agam_output.cog.sanger.ac.uk/AB0275-C.gatk.zarr.zip,765230d6bd7575456b6a949613c6dcc8,vr-pipe-vo-agam,AB0275,sequenced,False,True,ERS224781,BF12-26,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0276-C,https://vo_agam_output.cog.sanger.ac.uk/AB0276-C.bam,3e51d5baf3cbbfbe6907bcc04720fd3a,https://vo_agam_output.cog.sanger.ac.uk/AB0276-C.vcf.gz,5a15c136be9682318702967bf1232c7a,https://vo_agam_output.cog.sanger.ac.uk/AB0276-C.gatk.zarr.zip,ef58e8e9077fc5cc44072e408cfee392,vr-pipe-vo-agam,AB0276,sequenced,True,True,ERS224319,BF12-27,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0277-C,https://vo_agam_output.cog.sanger.ac.uk/AB0277-C.bam,8084ad1543f7c5f5f92d7b361dc92b55,https://vo_agam_output.cog.sanger.ac.uk/AB0277-C.vcf.gz,e08cfd5c157aa96aa48bb7e01ab3e1dd,https://vo_agam_output.cog.sanger.ac.uk/AB0277-C.gatk.zarr.zip,49942ec709d1762590dd55962bc1442a,vr-pipe-vo-agam,AB0277,sequenced,True,True,ERS224151,BF12-28,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0278-C,https://vo_agam_output.cog.sanger.ac.uk/AB0278-C.bam,240ff9a12f78d777b0e91812b42a51c1,https://vo_agam_output.cog.sanger.ac.uk/AB0278-C.vcf.gz,f79f1b0f009c7eb20896112a9a95e610,https://vo_agam_output.cog.sanger.ac.uk/AB0278-C.gatk.zarr.zip,273f2b75fc4dbd06e6fdf181ca29759e,vr-pipe-vo-agam,AB0278,sequenced,True,True,ERS224165,BF12-29,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0279-C,https://vo_agam_output.cog.sanger.ac.uk/AB0279-C.bam,dbb539bb1cc01def99a74abf16cb99e6,https://vo_agam_output.cog.sanger.ac.uk/AB0279-C.vcf.gz,950bea0a938f98e95fbd96017508dfb3,https://vo_agam_output.cog.sanger.ac.uk/AB0279-C.gatk.zarr.zip,3f8af418ae748cbf54451fa5132c6fe7,vr-pipe-vo-agam,AB0279,sequenced,True,True,ERS224190,BF12-30,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0280-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0280-Cx.bam,da00622486d8366d6bfe25bb11604087,https://vo_agam_output.cog.sanger.ac.uk/AB0280-Cx.vcf.gz,ed0ae495ca1266790bb0ed03772b89d6,https://vo_agam_output.cog.sanger.ac.uk/AB0280-Cx.gatk.zarr.zip,12859984d3903f08f871b6f102219d4e,vr-pipe-vo-agam,AB0280,sequenced,False,False,ERS224090,BF12-31,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0281-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0281-Cx.bam,e88856870f4db349d55232ae1e9947fd,https://vo_agam_output.cog.sanger.ac.uk/AB0281-Cx.vcf.gz,a563724f9b19840295916ad01bf11eab,https://vo_agam_output.cog.sanger.ac.uk/AB0281-Cx.gatk.zarr.zip,055486445f1bf518f2645d1c5569dcda,vr-pipe-vo-agam,AB0281,sequenced,False,False,ERS224670,BF12-32,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0282-Cx,https://vo_agam_output.cog.sanger.ac.uk/AB0282-Cx.bam,821f137ab5b07a0ef576c2bea73a69c1,https://vo_agam_output.cog.sanger.ac.uk/AB0282-Cx.vcf.gz,2edc69c6c2a7ce27101de2d5d4445813,https://vo_agam_output.cog.sanger.ac.uk/AB0282-Cx.gatk.zarr.zip,f33d849518e5ac257f61382174611888,vr-pipe-vo-agam,AB0282,sequenced,False,False,ERS224181,BF12-33,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0283-C,https://vo_agam_output.cog.sanger.ac.uk/AB0283-C.bam,a21dabcf2b4adfdd4f37cb804ae892a4,https://vo_agam_output.cog.sanger.ac.uk/AB0283-C.vcf.gz,d59ad451b4f2f56f75048bb684491d25,https://vo_agam_output.cog.sanger.ac.uk/AB0283-C.gatk.zarr.zip,d4c924ae1d45d79074a69e0239e530be,vr-pipe-vo-agam,AB0283,sequenced,True,True,ERS224248,BF10-12,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A +AB0284-C,https://vo_agam_output.cog.sanger.ac.uk/AB0284-C.bam,e103730d6f0260d922d84210868b1be7,https://vo_agam_output.cog.sanger.ac.uk/AB0284-C.vcf.gz,bd10104dc1cc580a59bb6de028515949,https://vo_agam_output.cog.sanger.ac.uk/AB0284-C.gatk.zarr.zip,a3361cfa2dda609029134b3cf80d03a4,vr-pipe-vo-agam,AB0284,sequenced,True,True,ERS224089,BF10-13,Austin Burt,Burkina Faso,Pala,2012,7,AG1000G-BF-A diff --git a/tests/anoph/fixture/missing_metadata/v3/metadata/species_calls_aim_20220528/AG1000G-AO/samples.species_aim.csv b/tests/anoph/fixture/missing_metadata/v3/metadata/species_calls_aim_20220528/AG1000G-AO/samples.species_aim.csv new file mode 100644 index 000000000..94d8569fd --- /dev/null +++ b/tests/anoph/fixture/missing_metadata/v3/metadata/species_calls_aim_20220528/AG1000G-AO/samples.species_aim.csv @@ -0,0 +1,82 @@ +sample_id,aim_species_fraction_arab,aim_species_fraction_colu,aim_species_fraction_colu_no2L,aim_species_gambcolu_arabiensis,aim_species_gambiae_coluzzii,aim_species +AR0047-C,0.000957670944263551,0.8567335243553008,0.9455445544554455,gambcolu,coluzzii,coluzzii +AR0049-C,0.0007665772326561902,0.8219669777458722,0.9354838709677419,gambcolu,coluzzii,coluzzii +AR0051-C,0.0007658433850277618,0.8258992805755395,0.9386401326699834,gambcolu,coluzzii,coluzzii +AR0061-C,0.0017241379310344827,0.8271872740419378,0.940099833610649,gambcolu,coluzzii,coluzzii +AR0078-C,0.0005744925315970893,0.8162239770279971,0.9288668320926385,gambcolu,coluzzii,coluzzii +AR0080-C,0.0005747126436781609,0.8845050215208035,0.9388429752066115,gambcolu,coluzzii,coluzzii +AR0084-C,0.001915341888527102,0.9481268011527377,0.946843853820598,gambcolu,coluzzii,coluzzii +AR0097-C,0.0005743825387708214,0.8356066044508256,0.9363110008271298,gambcolu,coluzzii,coluzzii +AR0072-C,0.0009584052137243627,0.8320172290021536,0.9437551695616212,gambcolu,coluzzii,coluzzii +AR0094-C,0.0013412531136232995,0.832258064516129,0.9463253509496284,gambcolu,coluzzii,coluzzii +AR0095-C,0.0013402259237985832,0.8138528138528138,0.9259567387687188,gambcolu,coluzzii,coluzzii +AR0083-C,0.000957670944263551,0.827933765298776,0.9427385892116182,gambcolu,coluzzii,coluzzii +AR0093-C,0.0007656967840735069,0.8299856527977044,0.9454545454545454,gambcolu,coluzzii,coluzzii +AR0021-C,0.0015325670498084292,0.8265232974910395,0.9388934764657308,gambcolu,coluzzii,coluzzii +AR0082-C,0.0011494252873563218,0.8916786226685797,0.947107438016529,gambcolu,coluzzii,coluzzii +AR0008-C,0.0005743825387708214,0.8305814788226848,0.9445822994210091,gambcolu,coluzzii,coluzzii +AR0085-C,0.00019142419601837673,0.8829022988505747,0.9370860927152318,gambcolu,coluzzii,coluzzii +AR0098-C,0.0011496455259628281,0.826867816091954,0.9403973509933775,gambcolu,coluzzii,coluzzii +AR0092-C,0.0007668711656441718,0.8226848528356066,0.9354838709677419,gambcolu,coluzzii,coluzzii +AR0017-C,0.0007661367554108408,0.8303378864126527,0.9453189726594863,gambcolu,coluzzii,coluzzii +AR0015-C,0.0005747126436781609,0.8215827338129497,0.9336650082918739,gambcolu,coluzzii,coluzzii +AR0019-C,0.0005753739930955121,0.8256814921090387,0.9388429752066115,gambcolu,coluzzii,coluzzii +AR0100-C,0.0009584052137243627,0.8830703012912482,0.9371900826446281,gambcolu,coluzzii,coluzzii +AR0034-C,0.0011485451761102604,0.888569374550683,0.9420049710024855,gambcolu,coluzzii,coluzzii +AR0086-C,0.0007667241709794901,0.8098995695839312,0.9206611570247933,gambcolu,coluzzii,coluzzii +AR0057-C,0.00038299502106472615,0.8226387887527037,0.9334995843724023,gambcolu,coluzzii,coluzzii +AR0076-C,0.0013415101571483327,0.8858578607322326,0.9412737799834574,gambcolu,coluzzii,coluzzii +AR0042-C,0.0009582215408202376,0.8312993539124193,0.9454094292803971,gambcolu,coluzzii,coluzzii +AR0063-C,0.0013407393219689715,0.822174226061915,0.9319502074688797,gambcolu,coluzzii,coluzzii +AR0012-C,0.0001915341888527102,0.8922413793103449,0.9478476821192053,gambcolu,coluzzii,coluzzii +AR0087-C,0.0013415101571483327,0.890961262553802,0.9454545454545454,gambcolu,coluzzii,coluzzii +AR0065-C,0.0007659900421294523,0.8695340501792115,0.9430222956234517,gambcolu,coluzzii,coluzzii +AR0038-C,0.0011496455259628281,0.8820143884892087,0.9378109452736318,gambcolu,coluzzii,coluzzii +AR0089-C,0.001915341888527102,0.818705035971223,0.9311774461028193,gambcolu,coluzzii,coluzzii +AR0071-C,0.0013417672992141077,0.8840892728581713,0.9402489626556016,gambcolu,coluzzii,coluzzii +AR0096-C,0.0005746025665581306,0.8200716845878137,0.9339388934764657,gambcolu,coluzzii,coluzzii +AR0088-C,0.0013407393219689715,0.8820143884892087,0.9361525704809287,gambcolu,coluzzii,coluzzii +AR0066-C,0.0005750431282346176,0.8274622573687994,0.9411764705882353,gambcolu,coluzzii,coluzzii +AR0023-C,0.00038299502106472615,0.8127690100430416,0.9239669421487603,gambcolu,coluzzii,coluzzii +AR0020-C,0.0017247987734764277,0.8312993539124193,0.9454094292803971,gambcolu,coluzzii,coluzzii +AR0024-C,0.0005747126436781609,0.9482386772106398,0.9461474730737366,gambcolu,coluzzii,coluzzii +AR0014-C,0.0009574875526618154,0.8147882268485284,0.9272125723738627,gambcolu,coluzzii,coluzzii +AR0079-C,0.0005746025665581306,0.8273381294964028,0.9411276948590381,gambcolu,coluzzii,coluzzii +AR0027-C,0.0003832151753209427,0.890961262553802,0.9454995871180842,gambcolu,coluzzii,coluzzii +AR0075-C,0.0003832151753209427,0.8848920863309353,0.939469320066335,gambcolu,coluzzii,coluzzii +AR0077-C,0.00038284839203675346,0.9549033643521833,0.9513602638087386,gambcolu,coluzzii,coluzzii +AR0007-C,0.0005743825387708214,0.8284278535534817,0.9421009098428453,gambcolu,coluzzii,coluzzii +AR0062-C,0.00038336208548974505,0.875,0.9279801324503312,gambcolu,coluzzii,coluzzii +AR0060-C,0.00019149751053236308,0.8261494252873564,0.9395695364238411,gambcolu,coluzzii,coluzzii +AR0022-C,0.0003831417624521073,0.8250539956803455,0.9394190871369295,gambcolu,coluzzii,coluzzii +AR0002-C,0.0003832151753209427,0.8868194842406877,0.9414191419141914,gambcolu,coluzzii,coluzzii +AR0059-C,0.0005744925315970893,0.8211206896551724,0.9346026490066225,gambcolu,coluzzii,coluzzii +AR0048-C,0.0005743825387708214,0.8255563531945441,0.9379652605459057,gambcolu,coluzzii,coluzzii +AR0011-C,0.0005743825387708214,0.8248384781048098,0.9387923904052936,gambcolu,coluzzii,coluzzii +AR0009-C,0.00038284839203675346,0.8621933621933622,0.9534109816971714,gambcolu,coluzzii,coluzzii +AR0043-C,0.00019149751053236308,0.8204577968526466,0.9341021416803954,gambcolu,coluzzii,coluzzii +AR0035-C,0.0005742725880551302,0.8259312320916905,0.9381188118811881,gambcolu,coluzzii,coluzzii +AR0074-C,0.001532860701283771,0.8879310344827587,0.9437086092715232,gambcolu,coluzzii,coluzzii +AR0045-C,0.0017244682889442423,0.8200716845878137,0.9322873658133773,gambcolu,coluzzii,coluzzii +AR0073-C,0.0009578544061302681,0.8229390681003584,0.935590421139554,gambcolu,coluzzii,coluzzii +AR0004-C,0.002491376006132618,0.8211968276856525,0.9343308395677473,gambcolu,coluzzii,coluzzii +AR0040-C,0.0007665772326561902,0.8268398268398268,0.9417637271214643,gambcolu,coluzzii,coluzzii +AR0052-C,0.0009571209800918836,0.8274874731567645,0.9414674361088211,gambcolu,coluzzii,coluzzii +AR0064-C,0.0009574875526618154,0.8223021582733813,0.9361525704809287,gambcolu,coluzzii,coluzzii +AR0044-C,0.0007664303506418854,0.8249279538904899,0.9368770764119602,gambcolu,coluzzii,coluzzii +AR0036-C,0.0011498658489842851,0.8281811646297628,0.9420049710024855,gambcolu,coluzzii,coluzzii +AR0001-C,0.0007659900421294523,0.8887293610911702,0.946236559139785,gambcolu,coluzzii,coluzzii +AR0006-C,0.0013412531136232995,0.823021582733813,0.9369817578772802,gambcolu,coluzzii,coluzzii +AR0046-C,0.0015325670498084292,0.8151079136690648,0.927860696517413,gambcolu,coluzzii,coluzzii +AR0070-Cx,0.0007665772326561902,0.8147882268485284,0.9288668320926385,gambcolu,coluzzii,coluzzii +AR0010-Cx,0.0005742725880551302,0.8249641319942611,0.9371900826446281,gambcolu,coluzzii,coluzzii +AR0090-Cx,0.0011492051331162612,0.8248384781048098,0.9379652605459057,gambcolu,coluzzii,coluzzii +AR0054-Cx,0.0005746025665581306,0.8291457286432161,0.9437551695616212,gambcolu,coluzzii,coluzzii +AR0016-Cx,0.0005742725880551302,0.8291457286432161,0.9437551695616212,gambcolu,coluzzii,coluzzii +AR0050-Cx,0.0007667241709794901,0.8223495702005731,0.9348184818481848,gambcolu,coluzzii,coluzzii +AR0069-Cx,0.0009574875526618154,0.8223021582733813,0.9369817578772802,gambcolu,coluzzii,coluzzii +AR0018-Cx,0.0005743825387708214,0.827116212338594,0.9413223140495868,gambcolu,coluzzii,coluzzii +AR0081-Cx,0.0007662835249042146,0.818116462976276,0.9304059652029826,gambcolu,coluzzii,coluzzii +AR0013-Cx,0.0005750431282346176,0.8935251798561151,0.9477611940298507,gambcolu,coluzzii,coluzzii +AR0026-C,0.0017257909875359539,0.8887293610911702,0.9437551695616212,gambcolu,coluzzii,coluzzii +AR0053-C,0.0003836562440053712,0.9157667386609071,0.9452282157676348,gambcolu,coluzzii,coluzzii diff --git a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.admin_units.csv b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.admin_units.csv deleted file mode 100644 index 8943aab61..000000000 --- a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.admin_units.csv +++ /dev/null @@ -1,648 +0,0 @@ -sample_id,country,country_ISO,adm1_name,adm1_ISO,adm2_name -VBS00256-4651STDY7017184,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00257-4651STDY7017185,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00259-4651STDY7017186,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00262-4651STDY7017187,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00277-4651STDY7017189,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00288-4651STDY7017191,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00289-4651STDY7017192,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00293-4651STDY7017193,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00309-4651STDY7017194,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00331-4651STDY7017196,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00343-4651STDY7017197,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00344-4651STDY7017198,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00345-4651STDY7017199,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00350-4651STDY7017200,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00351-4651STDY7017201,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00352-4651STDY7017202,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00353-4651STDY7017203,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00354-4651STDY7017204,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00355-4651STDY7017205,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00356-4651STDY7017206,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00358-4651STDY7017207,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00359-4651STDY7017208,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00361-4651STDY7017209,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00364-4651STDY7017211,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00365-4651STDY7017212,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00366-4651STDY7017213,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00367-4651STDY7017214,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00368-4651STDY7017215,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00370-4651STDY7017216,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00371-4651STDY7017217,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00373-4651STDY7017218,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00375-4651STDY7017220,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00388-4651STDY7017222,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00449-4651STDY7017223,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00912-4651STDY7017225,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00913-4651STDY7017226,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00916-4651STDY7017227,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00917-4651STDY7017228,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00918-4651STDY7017229,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00919-4651STDY7017230,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00920-4651STDY7017231,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00922-4651STDY7017232,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00925-4651STDY7017233,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00926-4651STDY7017234,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00927-4651STDY7017235,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00928-4651STDY7017236,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00929-4651STDY7017237,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00931-4651STDY7017238,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00932-4651STDY7017239,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00933-4651STDY7017240,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00935-4651STDY7017241,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00937-4651STDY7017243,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00943-4651STDY7017245,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00944-4651STDY7017246,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00945-4651STDY7017247,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00949-4651STDY7017248,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00950-4651STDY7017249,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00951-4651STDY7017250,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00952-4651STDY7017251,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00953-4651STDY7017252,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00957-4651STDY7017253,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00958-4651STDY7017254,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00960-4651STDY7017255,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00961-4651STDY7017256,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00962-4651STDY7017257,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00964-4651STDY7017258,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00969-4651STDY7017259,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00970-4651STDY7017260,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00971-4651STDY7017261,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00972-4651STDY7017262,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00974-4651STDY7017263,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00975-4651STDY7017264,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00976-4651STDY7017265,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00977-4651STDY7017266,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00982-4651STDY7017267,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00983-4651STDY7017268,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00985-4651STDY7017269,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00986-4651STDY7017270,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00989-4651STDY7017271,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00991-4651STDY7017272,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00992-4651STDY7017273,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00994-4651STDY7017274,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00997-4651STDY7017275,Mali,MLI,Koulikouro,ML-2,Banamba -VBS00999-4651STDY7017278,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01000-4651STDY7017279,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01001-4651STDY7017280,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01002-4651STDY7017281,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01004-4651STDY7017282,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01005-4651STDY7017283,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01006-4651STDY7017284,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01007-4651STDY7017285,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01008-4651STDY7017286,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01009-4651STDY7017287,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01012-4651STDY7017288,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01013-4651STDY7017289,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01014-4651STDY7017290,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01015-4651STDY7017291,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01016-4651STDY7017292,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01018-4651STDY7017293,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01026-4651STDY7017295,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01029-4651STDY7017296,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01030-4651STDY7017297,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01031-4651STDY7017298,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01037-4651STDY7017299,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01044-4651STDY7017301,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01045-4651STDY7017302,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01046-4651STDY7017303,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01048-4651STDY7017304,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01049-4651STDY7017305,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01050-4651STDY7017306,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01052-4651STDY7017307,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01053-4651STDY7017308,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01054-4651STDY7017309,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01055-4651STDY7017310,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01056-4651STDY7017311,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01057-4651STDY7017312,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01059-4651STDY7017313,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01061-4651STDY7017314,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01063-4651STDY7017315,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01064-4651STDY7017316,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01069-4651STDY7017317,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01070-4651STDY7017318,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01071-4651STDY7017319,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01072-4651STDY7017320,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01073-4651STDY7017321,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01074-4651STDY7017322,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01075-4651STDY7017323,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01078-4651STDY7017324,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01079-4651STDY7017325,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01080-4651STDY7017326,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01081-4651STDY7017327,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01082-4651STDY7017328,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01085-4651STDY7017329,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01086-4651STDY7017330,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01087-4651STDY7017331,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01089-4651STDY7017332,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01093-4651STDY7017333,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01107-4651STDY7017335,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01108-4651STDY7017336,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01109-4651STDY7017337,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01111-4651STDY7017338,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01112-4651STDY7017339,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01115-4651STDY7017340,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01123-4651STDY7017342,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01124-4651STDY7017343,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01125-4651STDY7017344,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01126-4651STDY7017345,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01129-4651STDY7017346,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01139-4651STDY7017350,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01140-4651STDY7017351,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01141-4651STDY7017352,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01142-4651STDY7017353,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01144-4651STDY7017354,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01149-4651STDY7017355,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01150-4651STDY7017356,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01152-4651STDY7017357,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01158-4651STDY7017358,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01161-4651STDY7017359,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01162-4651STDY7017360,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01163-4651STDY7017361,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01164-4651STDY7017362,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01166-4651STDY7017364,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01170-4651STDY7017365,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01171-4651STDY7017366,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01173-4651STDY7017367,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01174-4651STDY7017368,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01175-4651STDY7017369,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01178-4651STDY7017370,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01179-4651STDY7017371,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01180-4651STDY7017374,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01182-4651STDY7017375,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01184-4651STDY7017376,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01187-4651STDY7017377,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01189-4651STDY7017379,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01191-4651STDY7017380,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01194-4651STDY7017381,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01195-4651STDY7017382,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01197-4651STDY7017383,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01199-4651STDY7017384,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01200-4651STDY7017385,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01201-4651STDY7017386,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01202-4651STDY7017387,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01203-4651STDY7017388,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01204-4651STDY7017389,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01205-4651STDY7017390,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01207-4651STDY7017392,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01208-4651STDY7017393,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01209-4651STDY7017394,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01210-4651STDY7017395,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01211-4651STDY7017396,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01212-4651STDY7017397,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01213-4651STDY7017398,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01216-4651STDY7017400,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01218-4651STDY7017401,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01219-4651STDY7017402,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01222-4651STDY7017404,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01223-4651STDY7017405,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01224-4651STDY7017406,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01226-4651STDY7017407,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01227-4651STDY7017408,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01228-4651STDY7017409,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01229-4651STDY7017410,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01230-4651STDY7017411,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01231-4651STDY7017412,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01232-4651STDY7017413,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01233-4651STDY7017414,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01234-4651STDY7017415,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01235-4651STDY7017416,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01236-4651STDY7017417,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01237-4651STDY7017418,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01238-4651STDY7017419,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01239-4651STDY7017420,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01240-4651STDY7017421,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01241-4651STDY7017422,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01242-4651STDY7017423,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01244-4651STDY7017424,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01245-4651STDY7017425,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01246-4651STDY7017426,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01247-4651STDY7017427,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01248-4651STDY7017428,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01250-4651STDY7017429,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01251-4651STDY7017430,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01252-4651STDY7017431,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01253-4651STDY7017432,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01254-4651STDY7017433,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01256-4651STDY7017434,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01257-4651STDY7017435,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01258-4651STDY7017436,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01259-4651STDY7017437,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01261-4651STDY7017438,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01262-4651STDY7017439,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01265-4651STDY7017440,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01266-4651STDY7017441,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01267-4651STDY7017442,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01268-4651STDY7017443,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01269-4651STDY7017444,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01271-4651STDY7017445,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01272-4651STDY7017446,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01273-4651STDY7017447,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01274-4651STDY7017448,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01276-4651STDY7017450,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01277-4651STDY7017451,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01278-4651STDY7017452,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01279-4651STDY7017453,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01280-4651STDY7017454,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01291-4651STDY7017455,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01296-4651STDY7017456,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01297-4651STDY7017457,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01303-4651STDY7017458,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01304-4651STDY7017459,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01314-4651STDY7017460,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01315-4651STDY7017461,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01316-4651STDY7017462,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01317-4651STDY7017463,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01320-4651STDY7017464,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01322-4651STDY7017465,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01323-4651STDY7017466,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01328-4651STDY7017467,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01329-4651STDY7017470,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01330-4651STDY7017471,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01336-4651STDY7017473,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01338-4651STDY7017474,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01340-4651STDY7017475,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01341-4651STDY7017476,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01344-4651STDY7017477,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01345-4651STDY7017478,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01346-4651STDY7017479,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01347-4651STDY7017480,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01348-4651STDY7017481,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01350-4651STDY7017482,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01351-4651STDY7017483,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01352-4651STDY7017484,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01353-4651STDY7017485,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01354-4651STDY7017486,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01356-4651STDY7017487,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01358-4651STDY7017488,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01361-4651STDY7017489,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01362-4651STDY7017490,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01363-4651STDY7017491,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01365-4651STDY7017492,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01366-4651STDY7017493,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01367-4651STDY7017494,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01368-4651STDY7017495,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01369-4651STDY7017496,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01370-4651STDY7017497,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01372-4651STDY7017498,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01382-4651STDY7017499,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01389-4651STDY7017501,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01390-4651STDY7017502,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01391-4651STDY7017503,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01392-4651STDY7017504,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01397-4651STDY7017505,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01398-4651STDY7017506,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01399-4651STDY7017507,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01400-4651STDY7017508,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01401-4651STDY7017509,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01405-4651STDY7017510,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01406-4651STDY7017511,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01407-4651STDY7017512,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01408-4651STDY7017513,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01412-4651STDY7017514,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01413-4651STDY7017515,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01414-4651STDY7017516,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01415-4651STDY7017517,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01416-4651STDY7017518,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01418-4651STDY7017519,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01420-4651STDY7017520,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01421-4651STDY7017521,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01428-4651STDY7017522,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01430-4651STDY7017523,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01432-4651STDY7017525,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01433-4651STDY7017526,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01434-4651STDY7017527,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01435-4651STDY7017528,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01436-4651STDY7017529,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01438-4651STDY7017530,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01439-4651STDY7017531,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01442-4651STDY7017532,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01444-4651STDY7017533,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01445-4651STDY7017534,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01446-4651STDY7017535,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01448-4651STDY7017537,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01451-4651STDY7017538,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01452-4651STDY7017539,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01454-4651STDY7017540,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01456-4651STDY7017541,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01457-4651STDY7017542,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01462-4651STDY7017544,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01463-4651STDY7017545,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01464-4651STDY7017546,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01465-4651STDY7017547,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01466-4651STDY7017548,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01467-4651STDY7017549,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01470-4651STDY7017550,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01472-4651STDY7017551,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01473-4651STDY7017552,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01475-4651STDY7017553,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01476-4651STDY7017554,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01477-4651STDY7017555,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01478-4651STDY7017556,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01484-4651STDY7017557,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01492-4651STDY7017558,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01503-4651STDY7017559,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01506-4651STDY7017560,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01508-4651STDY7017561,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01509-4651STDY7017562,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01510-4651STDY7017563,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01511-4651STDY7017566,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01513-4651STDY7017567,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01516-4651STDY7017568,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01517-4651STDY7017569,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01518-4651STDY7017570,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01519-4651STDY7017571,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01520-4651STDY7017572,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01521-4651STDY7017573,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01522-4651STDY7017574,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01524-4651STDY7017575,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01525-4651STDY7017576,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01526-4651STDY7017577,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01527-4651STDY7017578,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01528-4651STDY7017579,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01530-4651STDY7017580,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01532-4651STDY7017581,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01533-4651STDY7017582,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01534-4651STDY7017583,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01535-4651STDY7017584,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01536-4651STDY7017585,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01537-4651STDY7017586,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01538-4651STDY7017587,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01540-4651STDY7017588,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01541-4651STDY7017589,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01542-4651STDY7017590,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01543-4651STDY7017591,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01544-4651STDY7017592,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01546-4651STDY7017593,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01548-4651STDY7017594,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01549-4651STDY7017595,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01551-4651STDY7017596,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01552-4651STDY7017597,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01553-4651STDY7017598,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01554-4651STDY7017599,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01556-4651STDY7017600,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01557-4651STDY7017601,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01558-4651STDY7017602,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01562-4651STDY7017603,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01564-4651STDY7017604,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01565-4651STDY7017605,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01566-4651STDY7017606,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01569-4651STDY7017607,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01570-4651STDY7017608,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01571-4651STDY7017609,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01572-4651STDY7017610,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01574-4651STDY7017611,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01575-4651STDY7017612,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01576-4651STDY7017613,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01578-4651STDY7017614,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01579-4651STDY7017615,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01580-4651STDY7017616,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01583-4651STDY7017617,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01584-4651STDY7017618,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01586-4651STDY7017619,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01587-4651STDY7017620,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01590-4651STDY7017621,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01591-4651STDY7017622,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01592-4651STDY7017623,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01594-4651STDY7017624,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01595-4651STDY7017625,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01596-4651STDY7017626,Mali,MLI,Koulikouro,ML-2,Banamba -VBS01606-4651STDY7017629,Mali,MLI,Segou,ML-4,Niono -VBS01608-4651STDY7017630,Mali,MLI,Segou,ML-4,Niono -VBS01610-4651STDY7017631,Mali,MLI,Segou,ML-4,Niono -VBS01615-4651STDY7017632,Mali,MLI,Segou,ML-4,Niono -VBS01631-4651STDY7017634,Mali,MLI,Segou,ML-4,Niono -VBS01640-4651STDY7017637,Mali,MLI,Segou,ML-4,Niono -VBS01647-4651STDY7017640,Mali,MLI,Segou,ML-4,Niono -VBS01658-4651STDY7017643,Mali,MLI,Segou,ML-4,Niono -VBS01664-4651STDY7017645,Mali,MLI,Segou,ML-4,Niono -VBS01669-4651STDY7017646,Mali,MLI,Segou,ML-4,Niono -VBS01670-4651STDY7017647,Mali,MLI,Segou,ML-4,Niono -VBS01672-4651STDY7017648,Mali,MLI,Segou,ML-4,Niono -VBS01678-4651STDY7017651,Mali,MLI,Segou,ML-4,Niono -VBS01679-4651STDY7017652,Mali,MLI,Segou,ML-4,Niono -VBS01680-4651STDY7017653,Mali,MLI,Segou,ML-4,Niono -VBS01688-4651STDY7017655,Mali,MLI,Segou,ML-4,Niono -VBS01689-4651STDY7017656,Mali,MLI,Segou,ML-4,Niono -VBS01690-4651STDY7017657,Mali,MLI,Segou,ML-4,Niono -VBS01693-4651STDY7017658,Mali,MLI,Segou,ML-4,Niono -VBS01695-4651STDY7017659,Mali,MLI,Segou,ML-4,Niono -VBS01696-4651STDY7017662,Mali,MLI,Segou,ML-4,Niono -VBS01698-4651STDY7017664,Mali,MLI,Segou,ML-4,Niono -VBS01702-4651STDY7017666,Mali,MLI,Segou,ML-4,Niono -VBS01705-4651STDY7017668,Mali,MLI,Segou,ML-4,Niono -VBS01708-4651STDY7017669,Mali,MLI,Segou,ML-4,Niono -VBS01709-4651STDY7017670,Mali,MLI,Segou,ML-4,Niono -VBS01713-4651STDY7017673,Mali,MLI,Segou,ML-4,Niono -VBS01714-4651STDY7017674,Mali,MLI,Segou,ML-4,Niono -VBS01715-4651STDY7017675,Mali,MLI,Segou,ML-4,Niono -VBS01721-4651STDY7017676,Mali,MLI,Segou,ML-4,Niono -VBS01722-4651STDY7017677,Mali,MLI,Segou,ML-4,Niono -VBS01726-4651STDY7017679,Mali,MLI,Segou,ML-4,Niono -VBS01727-4651STDY7017680,Mali,MLI,Segou,ML-4,Niono -VBS01728-4651STDY7017681,Mali,MLI,Segou,ML-4,Niono -VBS01730-4651STDY7017683,Mali,MLI,Segou,ML-4,Niono -VBS01735-4651STDY7017685,Mali,MLI,Segou,ML-4,Niono -VBS01736-4651STDY7017686,Mali,MLI,Segou,ML-4,Niono -VBS01737-4651STDY7017687,Mali,MLI,Segou,ML-4,Niono -VBS01740-4651STDY7017688,Mali,MLI,Segou,ML-4,Niono -VBS01743-4651STDY7017690,Mali,MLI,Segou,ML-4,Niono -VBS01746-4651STDY7017692,Mali,MLI,Segou,ML-4,Niono -VBS01747-4651STDY7017693,Mali,MLI,Segou,ML-4,Niono -VBS01749-4651STDY7017694,Mali,MLI,Segou,ML-4,Niono -VBS01750-4651STDY7017695,Mali,MLI,Segou,ML-4,Niono -VBS01751-4651STDY7017696,Mali,MLI,Segou,ML-4,Niono -VBS01752-4651STDY7017697,Mali,MLI,Segou,ML-4,Niono -VBS01753-4651STDY7017698,Mali,MLI,Segou,ML-4,Niono -VBS01754-4651STDY7017699,Mali,MLI,Segou,ML-4,Niono -VBS01755-4651STDY7017700,Mali,MLI,Segou,ML-4,Niono -VBS01760-4651STDY7017704,Mali,MLI,Segou,ML-4,Niono -VBS01762-4651STDY7017706,Mali,MLI,Segou,ML-4,Niono -VBS01764-4651STDY7017708,Mali,MLI,Segou,ML-4,Niono -VBS01766-4651STDY7017709,Mali,MLI,Segou,ML-4,Niono -VBS01767-4651STDY7017710,Mali,MLI,Segou,ML-4,Niono -VBS01768-4651STDY7017711,Mali,MLI,Segou,ML-4,Niono -VBS01770-4651STDY7017713,Mali,MLI,Segou,ML-4,Niono -VBS01771-4651STDY7017714,Mali,MLI,Segou,ML-4,Niono -VBS01772-4651STDY7017715,Mali,MLI,Segou,ML-4,Niono -VBS01773-4651STDY7017716,Mali,MLI,Segou,ML-4,Niono -VBS01774-4651STDY7017717,Mali,MLI,Segou,ML-4,Niono -VBS01775-4651STDY7017718,Mali,MLI,Segou,ML-4,Niono -VBS01776-4651STDY7017719,Mali,MLI,Segou,ML-4,Niono -VBS01779-4651STDY7017722,Mali,MLI,Segou,ML-4,Niono -VBS01780-4651STDY7017723,Mali,MLI,Segou,ML-4,Niono -VBS01781-4651STDY7017724,Mali,MLI,Segou,ML-4,Niono -VBS01782-4651STDY7017725,Mali,MLI,Segou,ML-4,Niono -VBS01784-4651STDY7017727,Mali,MLI,Segou,ML-4,Niono -VBS01785-4651STDY7017728,Mali,MLI,Segou,ML-4,Niono -VBS01787-4651STDY7017730,Mali,MLI,Segou,ML-4,Niono -VBS01788-4651STDY7017731,Mali,MLI,Segou,ML-4,Niono -VBS01789-4651STDY7017732,Mali,MLI,Segou,ML-4,Niono -VBS01790-4651STDY7017733,Mali,MLI,Segou,ML-4,Niono -VBS01791-4651STDY7017734,Mali,MLI,Segou,ML-4,Niono -VBS01792-4651STDY7017735,Mali,MLI,Segou,ML-4,Niono -VBS01793-4651STDY7017736,Mali,MLI,Segou,ML-4,Niono -VBS01794-4651STDY7017737,Mali,MLI,Segou,ML-4,Niono -VBS01795-4651STDY7017738,Mali,MLI,Segou,ML-4,Niono -VBS01796-4651STDY7017739,Mali,MLI,Segou,ML-4,Niono -VBS01798-4651STDY7017741,Mali,MLI,Segou,ML-4,Niono -VBS01799-4651STDY7017742,Mali,MLI,Segou,ML-4,Niono -VBS01801-4651STDY7017743,Mali,MLI,Segou,ML-4,Niono -VBS01802-4651STDY7017744,Mali,MLI,Segou,ML-4,Niono -VBS01803-4651STDY7017745,Mali,MLI,Segou,ML-4,Niono -VBS01804-4651STDY7017746,Mali,MLI,Segou,ML-4,Niono -VBS01805-4651STDY7017747,Mali,MLI,Segou,ML-4,Niono -VBS01806-4651STDY7017748,Mali,MLI,Segou,ML-4,Niono -VBS01807-4651STDY7017749,Mali,MLI,Segou,ML-4,Niono -VBS01810-4651STDY7017752,Mali,MLI,Segou,ML-4,Niono -VBS01812-4651STDY7017754,Mali,MLI,Segou,ML-4,Niono -VBS01813-4651STDY7017755,Mali,MLI,Segou,ML-4,Niono -VBS01816-4651STDY7017760,Mali,MLI,Segou,ML-4,Niono -VBS01817-4651STDY7017761,Mali,MLI,Segou,ML-4,Niono -VBS01818-4651STDY7017762,Mali,MLI,Segou,ML-4,Niono -VBS01820-4651STDY7017764,Mali,MLI,Segou,ML-4,Niono -VBS01821-4651STDY7017765,Mali,MLI,Segou,ML-4,Niono -VBS01823-4651STDY7017767,Mali,MLI,Segou,ML-4,Niono -VBS01824-4651STDY7017768,Mali,MLI,Segou,ML-4,Niono -VBS01825-4651STDY7017769,Mali,MLI,Segou,ML-4,Niono -VBS01827-4651STDY7017771,Mali,MLI,Segou,ML-4,Niono -VBS01828-4651STDY7017772,Mali,MLI,Segou,ML-4,Niono -VBS01829-4651STDY7017773,Mali,MLI,Segou,ML-4,Niono -VBS01830-4651STDY7017774,Mali,MLI,Segou,ML-4,Niono -VBS01831-4651STDY7017775,Mali,MLI,Segou,ML-4,Niono -VBS01832-4651STDY7017776,Mali,MLI,Segou,ML-4,Niono -VBS01833-4651STDY7017777,Mali,MLI,Segou,ML-4,Niono -VBS01834-4651STDY7017778,Mali,MLI,Segou,ML-4,Niono -VBS01836-4651STDY7017780,Mali,MLI,Segou,ML-4,Niono -VBS01838-4651STDY7017781,Mali,MLI,Segou,ML-4,Niono -VBS01839-4651STDY7017782,Mali,MLI,Segou,ML-4,Niono -VBS01840-4651STDY7017783,Mali,MLI,Segou,ML-4,Niono -VBS01841-4651STDY7017784,Mali,MLI,Segou,ML-4,Niono -VBS01842-4651STDY7017785,Mali,MLI,Segou,ML-4,Niono -VBS01843-4651STDY7017786,Mali,MLI,Segou,ML-4,Niono -VBS01844-4651STDY7017787,Mali,MLI,Segou,ML-4,Niono -VBS01846-4651STDY7017788,Mali,MLI,Segou,ML-4,Niono -VBS01847-4651STDY7017789,Mali,MLI,Segou,ML-4,Niono -VBS01848-4651STDY7017790,Mali,MLI,Segou,ML-4,Niono -VBS01850-4651STDY7017792,Mali,MLI,Segou,ML-4,Niono -VBS01851-4651STDY7017793,Mali,MLI,Segou,ML-4,Niono -VBS01852-4651STDY7017794,Mali,MLI,Segou,ML-4,Niono -VBS01853-4651STDY7017795,Mali,MLI,Segou,ML-4,Niono -VBS01858-4651STDY7017796,Mali,MLI,Segou,ML-4,Niono -VBS01860-4651STDY7017797,Mali,MLI,Segou,ML-4,Niono -VBS01861-4651STDY7017798,Mali,MLI,Segou,ML-4,Niono -VBS01862-4651STDY7017799,Mali,MLI,Segou,ML-4,Niono -VBS01864-4651STDY7017800,Mali,MLI,Segou,ML-4,Niono -VBS01865-4651STDY7017801,Mali,MLI,Segou,ML-4,Niono -VBS01866-4651STDY7017802,Mali,MLI,Segou,ML-4,Niono -VBS01868-4651STDY7017803,Mali,MLI,Segou,ML-4,Niono -VBS01869-4651STDY7017804,Mali,MLI,Segou,ML-4,Niono -VBS01871-4651STDY7017806,Mali,MLI,Segou,ML-4,Niono -VBS01873-4651STDY7017807,Mali,MLI,Segou,ML-4,Niono -VBS01874-4651STDY7017808,Mali,MLI,Segou,ML-4,Niono -VBS01876-4651STDY7017809,Mali,MLI,Segou,ML-4,Niono -VBS01878-4651STDY7017811,Mali,MLI,Segou,ML-4,Niono -VBS01884-4651STDY7017814,Mali,MLI,Segou,ML-4,Niono -VBS01885-4651STDY7017815,Mali,MLI,Segou,ML-4,Niono -VBS01886-4651STDY7017816,Mali,MLI,Segou,ML-4,Niono -VBS01887-4651STDY7017817,Mali,MLI,Segou,ML-4,Niono -VBS01888-4651STDY7017818,Mali,MLI,Segou,ML-4,Niono -VBS01889-4651STDY7017819,Mali,MLI,Segou,ML-4,Niono -VBS01890-4651STDY7017820,Mali,MLI,Segou,ML-4,Niono -VBS01892-4651STDY7017821,Mali,MLI,Segou,ML-4,Niono -VBS01893-4651STDY7017822,Mali,MLI,Segou,ML-4,Niono -VBS01896-4651STDY7017823,Mali,MLI,Segou,ML-4,Niono -VBS01897-4651STDY7017824,Mali,MLI,Segou,ML-4,Niono -VBS01898-4651STDY7017825,Mali,MLI,Segou,ML-4,Niono -VBS01899-4651STDY7017826,Mali,MLI,Segou,ML-4,Niono -VBS01902-4651STDY7017829,Mali,MLI,Segou,ML-4,Niono -VBS01903-4651STDY7017830,Mali,MLI,Segou,ML-4,Niono -VBS01904-4651STDY7017831,Mali,MLI,Segou,ML-4,Niono -VBS01906-4651STDY7017833,Mali,MLI,Segou,ML-4,Niono -VBS01907-4651STDY7017834,Mali,MLI,Segou,ML-4,Niono -VBS01908-4651STDY7017835,Mali,MLI,Segou,ML-4,Niono -VBS01909-4651STDY7017836,Mali,MLI,Segou,ML-4,Niono -VBS01910-4651STDY7017837,Mali,MLI,Segou,ML-4,Niono -VBS01911-4651STDY7017838,Mali,MLI,Segou,ML-4,Niono -VBS01912-4651STDY7017839,Mali,MLI,Segou,ML-4,Niono -VBS01913-4651STDY7017840,Mali,MLI,Segou,ML-4,Niono -VBS01914-4651STDY7017841,Mali,MLI,Segou,ML-4,Niono -VBS01915-4651STDY7017842,Mali,MLI,Segou,ML-4,Niono -VBS01916-4651STDY7017843,Mali,MLI,Segou,ML-4,Niono -VBS01917-4651STDY7017844,Mali,MLI,Segou,ML-4,Niono -VBS01918-4651STDY7017845,Mali,MLI,Segou,ML-4,Niono -VBS01919-4651STDY7017846,Mali,MLI,Segou,ML-4,Niono -VBS01920-4651STDY7017847,Mali,MLI,Segou,ML-4,Niono -VBS01921-4651STDY7017848,Mali,MLI,Segou,ML-4,Niono -VBS01922-4651STDY7017849,Mali,MLI,Segou,ML-4,Niono -VBS01924-4651STDY7017850,Mali,MLI,Segou,ML-4,Niono -VBS01925-4651STDY7017851,Mali,MLI,Segou,ML-4,Niono -VBS01926-4651STDY7017854,Mali,MLI,Segou,ML-4,Niono -VBS01927-4651STDY7017855,Mali,MLI,Segou,ML-4,Niono -VBS01928-4651STDY7017856,Mali,MLI,Segou,ML-4,Niono -VBS01929-4651STDY7017857,Mali,MLI,Segou,ML-4,Niono -VBS01930-4651STDY7017858,Mali,MLI,Segou,ML-4,Niono -VBS01931-4651STDY7017859,Mali,MLI,Segou,ML-4,Niono -VBS01932-4651STDY7017860,Mali,MLI,Segou,ML-4,Niono -VBS01933-4651STDY7017861,Mali,MLI,Segou,ML-4,Niono -VBS01934-4651STDY7017862,Mali,MLI,Segou,ML-4,Niono -VBS01935-4651STDY7017863,Mali,MLI,Segou,ML-4,Niono -VBS01937-4651STDY7017864,Mali,MLI,Segou,ML-4,Niono -VBS01938-4651STDY7017865,Mali,MLI,Segou,ML-4,Niono -VBS01939-4651STDY7017866,Mali,MLI,Segou,ML-4,Niono -VBS01940-4651STDY7017867,Mali,MLI,Segou,ML-4,Niono -VBS01941-4651STDY7017868,Mali,MLI,Segou,ML-4,Niono -VBS01942-4651STDY7017869,Mali,MLI,Segou,ML-4,Niono -VBS01943-4651STDY7017870,Mali,MLI,Segou,ML-4,Niono -VBS01944-4651STDY7017871,Mali,MLI,Segou,ML-4,Niono -VBS01945-4651STDY7017872,Mali,MLI,Segou,ML-4,Niono -VBS01946-4651STDY7017873,Mali,MLI,Segou,ML-4,Niono -VBS01947-4651STDY7017874,Mali,MLI,Segou,ML-4,Niono -VBS01948-4651STDY7017875,Mali,MLI,Segou,ML-4,Niono -VBS01949-4651STDY7017876,Mali,MLI,Segou,ML-4,Niono -VBS01950-4651STDY7017877,Mali,MLI,Segou,ML-4,Niono -VBS01951-4651STDY7017878,Mali,MLI,Segou,ML-4,Niono -VBS01952-4651STDY7017879,Mali,MLI,Segou,ML-4,Niono -VBS01953-4651STDY7017880,Mali,MLI,Segou,ML-4,Niono -VBS01954-4651STDY7017881,Mali,MLI,Segou,ML-4,Niono -VBS01955-4651STDY7017882,Mali,MLI,Segou,ML-4,Niono -VBS01956-4651STDY7017883,Mali,MLI,Segou,ML-4,Niono -VBS01957-4651STDY7017884,Mali,MLI,Segou,ML-4,Niono -VBS01958-4651STDY7017885,Mali,MLI,Segou,ML-4,Niono -VBS01959-4651STDY7017886,Mali,MLI,Segou,ML-4,Niono -VBS01960-4651STDY7017887,Mali,MLI,Segou,ML-4,Niono -VBS01961-4651STDY7017888,Mali,MLI,Segou,ML-4,Niono -VBS01962-4651STDY7017889,Mali,MLI,Segou,ML-4,Niono -VBS01963-4651STDY7017890,Mali,MLI,Segou,ML-4,Niono -VBS01964-4651STDY7017891,Mali,MLI,Segou,ML-4,Niono -VBS01965-4651STDY7017892,Mali,MLI,Segou,ML-4,Niono -VBS01966-4651STDY7017893,Mali,MLI,Segou,ML-4,Niono -VBS01968-4651STDY7017895,Mali,MLI,Segou,ML-4,Niono -VBS01970-4651STDY7017896,Mali,MLI,Segou,ML-4,Niono -VBS01971-4651STDY7017897,Mali,MLI,Segou,ML-4,Niono -VBS01972-4651STDY7017898,Mali,MLI,Segou,ML-4,Niono -VBS01973-4651STDY7017899,Mali,MLI,Segou,ML-4,Niono -VBS01974-4651STDY7017900,Mali,MLI,Segou,ML-4,Niono -VBS01975-4651STDY7017901,Mali,MLI,Segou,ML-4,Niono -VBS01976-4651STDY7017902,Mali,MLI,Segou,ML-4,Niono -VBS01978-4651STDY7017903,Mali,MLI,Segou,ML-4,Niono -VBS01979-4651STDY7017904,Mali,MLI,Segou,ML-4,Niono -VBS01980-4651STDY7017905,Mali,MLI,Segou,ML-4,Niono -VBS01981-4651STDY7017906,Mali,MLI,Segou,ML-4,Niono -VBS01982-4651STDY7017907,Mali,MLI,Segou,ML-4,Niono -VBS01983-4651STDY7017908,Mali,MLI,Segou,ML-4,Niono -VBS01984-4651STDY7017909,Mali,MLI,Segou,ML-4,Niono -VBS01985-4651STDY7017910,Mali,MLI,Segou,ML-4,Niono -VBS01986-4651STDY7017911,Mali,MLI,Segou,ML-4,Niono -VBS01987-4651STDY7017912,Mali,MLI,Segou,ML-4,Niono -VBS01990-4651STDY7017915,Mali,MLI,Segou,ML-4,Niono -VBS01991-4651STDY7017916,Mali,MLI,Segou,ML-4,Niono -VBS01992-4651STDY7017917,Mali,MLI,Segou,ML-4,Niono -VBS01994-4651STDY7017918,Mali,MLI,Segou,ML-4,Niono -VBS01996-4651STDY7017920,Mali,MLI,Segou,ML-4,Niono -VBS01997-4651STDY7017921,Mali,MLI,Segou,ML-4,Niono -VBS01998-4651STDY7017922,Mali,MLI,Segou,ML-4,Niono -VBS02000-4651STDY7017924,Mali,MLI,Segou,ML-4,Niono -VBS02001-4651STDY7017925,Mali,MLI,Segou,ML-4,Niono -VBS02002-4651STDY7017926,Mali,MLI,Segou,ML-4,Niono -VBS06347-4651STDY7017927,Mali,MLI,Segou,ML-4,Niono diff --git a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.cohorts.csv b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.cohorts.csv index eb54b3456..75588be12 100644 --- a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.cohorts.csv +++ b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.cohorts.csv @@ -1,4 +1,7 @@ sample_id,country_ISO,adm1_name,adm1_ISO,adm2_name,taxon,cohort_admin1_year,cohort_admin1_month,cohort_admin1_quarter,cohort_admin2_year,cohort_admin2_month,cohort_admin2_quarter +VBS00388-4651STDY7017222,MLI,Koulikouro,ML-2,Banamba,arabiensis,ML-2_arab_2012,ML-2_arab_2012_10,ML-2_arab_2012_Q4,ML-2_Banamba_arab_2012,ML-2_Banamba_arab_2012_10,ML-2_Banamba_arab_2012_Q4 +VBS01026-4651STDY7017295,MLI,Koulikouro,ML-2,Banamba,arabiensis,ML-2_arab_2014,ML-2_arab_2014_10,ML-2_arab_2014_Q4,ML-2_Banamba_arab_2014,ML-2_Banamba_arab_2014_10,ML-2_Banamba_arab_2014_Q4 +VBS01053-4651STDY7017308,MLI,Koulikouro,ML-2,Banamba,arabiensis,ML-2_arab_2014,ML-2_arab_2014_10,ML-2_arab_2014_Q4,ML-2_Banamba_arab_2014,ML-2_Banamba_arab_2014_10,ML-2_Banamba_arab_2014_Q4 VBS00256-4651STDY7017184,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML-2_colu_2012_06,ML-2_colu_2012_Q2,ML-2_Banamba_colu_2012,ML-2_Banamba_colu_2012_06,ML-2_Banamba_colu_2012_Q2 VBS00257-4651STDY7017185,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML-2_colu_2012_06,ML-2_colu_2012_Q2,ML-2_Banamba_colu_2012,ML-2_Banamba_colu_2012_06,ML-2_Banamba_colu_2012_Q2 VBS00259-4651STDY7017186,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML-2_colu_2012_06,ML-2_colu_2012_Q2,ML-2_Banamba_colu_2012,ML-2_Banamba_colu_2012_06,ML-2_Banamba_colu_2012_Q2 @@ -31,7 +34,6 @@ VBS00370-4651STDY7017216,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML- VBS00371-4651STDY7017217,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML-2_colu_2012_10,ML-2_colu_2012_Q4,ML-2_Banamba_colu_2012,ML-2_Banamba_colu_2012_10,ML-2_Banamba_colu_2012_Q4 VBS00373-4651STDY7017218,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML-2_colu_2012_11,ML-2_colu_2012_Q4,ML-2_Banamba_colu_2012,ML-2_Banamba_colu_2012_11,ML-2_Banamba_colu_2012_Q4 VBS00375-4651STDY7017220,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML-2_colu_2012_11,ML-2_colu_2012_Q4,ML-2_Banamba_colu_2012,ML-2_Banamba_colu_2012_11,ML-2_Banamba_colu_2012_Q4 -VBS00388-4651STDY7017222,MLI,Koulikouro,ML-2,Banamba,arabiensis,ML-2_arab_2012,ML-2_arab_2012_10,ML-2_arab_2012_Q4,ML-2_Banamba_arab_2012,ML-2_Banamba_arab_2012_10,ML-2_Banamba_arab_2012_Q4 VBS00449-4651STDY7017223,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2012,ML-2_colu_2012_09,ML-2_colu_2012_Q3,ML-2_Banamba_colu_2012,ML-2_Banamba_colu_2012_09,ML-2_Banamba_colu_2012_Q3 VBS00912-4651STDY7017225,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_05,ML-2_colu_2014_Q2,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_05,ML-2_Banamba_colu_2014_Q2 VBS00913-4651STDY7017226,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_05,ML-2_colu_2014_Q2,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_05,ML-2_Banamba_colu_2014_Q2 @@ -98,7 +100,6 @@ VBS01014-4651STDY7017290,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML- VBS01015-4651STDY7017291,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_06,ML-2_colu_2014_Q2,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_06,ML-2_Banamba_colu_2014_Q2 VBS01016-4651STDY7017292,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_06,ML-2_colu_2014_Q2,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_06,ML-2_Banamba_colu_2014_Q2 VBS01018-4651STDY7017293,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_10,ML-2_colu_2014_Q4,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_10,ML-2_Banamba_colu_2014_Q4 -VBS01026-4651STDY7017295,MLI,Koulikouro,ML-2,Banamba,arabiensis,ML-2_arab_2014,ML-2_arab_2014_10,ML-2_arab_2014_Q4,ML-2_Banamba_arab_2014,ML-2_Banamba_arab_2014_10,ML-2_Banamba_arab_2014_Q4 VBS01029-4651STDY7017296,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_10,ML-2_colu_2014_Q4,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_10,ML-2_Banamba_colu_2014_Q4 VBS01030-4651STDY7017297,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_10,ML-2_colu_2014_Q4,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_10,ML-2_Banamba_colu_2014_Q4 VBS01031-4651STDY7017298,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_10,ML-2_colu_2014_Q4,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_10,ML-2_Banamba_colu_2014_Q4 @@ -110,7 +111,6 @@ VBS01048-4651STDY7017304,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML- VBS01049-4651STDY7017305,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_10,ML-2_colu_2014_Q4,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_10,ML-2_Banamba_colu_2014_Q4 VBS01050-4651STDY7017306,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_10,ML-2_colu_2014_Q4,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_10,ML-2_Banamba_colu_2014_Q4 VBS01052-4651STDY7017307,MLI,Koulikouro,ML-2,Banamba,gambiae,ML-2_gamb_2014,ML-2_gamb_2014_10,ML-2_gamb_2014_Q4,ML-2_Banamba_gamb_2014,ML-2_Banamba_gamb_2014_10,ML-2_Banamba_gamb_2014_Q4 -VBS01053-4651STDY7017308,MLI,Koulikouro,ML-2,Banamba,arabiensis,ML-2_arab_2014,ML-2_arab_2014_10,ML-2_arab_2014_Q4,ML-2_Banamba_arab_2014,ML-2_Banamba_arab_2014_10,ML-2_Banamba_arab_2014_Q4 VBS01054-4651STDY7017309,MLI,Koulikouro,ML-2,Banamba,gambiae,ML-2_gamb_2014,ML-2_gamb_2014_10,ML-2_gamb_2014_Q4,ML-2_Banamba_gamb_2014,ML-2_Banamba_gamb_2014_10,ML-2_Banamba_gamb_2014_Q4 VBS01055-4651STDY7017310,MLI,Koulikouro,ML-2,Banamba,coluzzii,ML-2_colu_2014,ML-2_colu_2014_10,ML-2_colu_2014_Q4,ML-2_Banamba_colu_2014,ML-2_Banamba_colu_2014_10,ML-2_Banamba_colu_2014_Q4 VBS01056-4651STDY7017311,MLI,Koulikouro,ML-2,Banamba,gambiae,ML-2_gamb_2014,ML-2_gamb_2014_10,ML-2_gamb_2014_Q4,ML-2_Banamba_gamb_2014,ML-2_Banamba_gamb_2014_10,ML-2_Banamba_gamb_2014_Q4 diff --git a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.taxa.csv b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.taxa.csv deleted file mode 100644 index d50d9c5b8..000000000 --- a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/cohorts_20230223/1177-VO-ML-LEHMANN-VMF00004/samples.taxa.csv +++ /dev/null @@ -1,648 +0,0 @@ -sample_id,taxon -VBS00256-4651STDY7017184,coluzzii -VBS00257-4651STDY7017185,coluzzii -VBS00259-4651STDY7017186,coluzzii -VBS00262-4651STDY7017187,coluzzii -VBS00277-4651STDY7017189,coluzzii -VBS00288-4651STDY7017191,coluzzii -VBS00289-4651STDY7017192,coluzzii -VBS00293-4651STDY7017193,coluzzii -VBS00309-4651STDY7017194,coluzzii -VBS00331-4651STDY7017196,coluzzii -VBS00343-4651STDY7017197,gambiae -VBS00344-4651STDY7017198,coluzzii -VBS00345-4651STDY7017199,gambiae -VBS00350-4651STDY7017200,gambiae -VBS00351-4651STDY7017201,coluzzii -VBS00352-4651STDY7017202,gambiae -VBS00353-4651STDY7017203,gambiae -VBS00354-4651STDY7017204,coluzzii -VBS00355-4651STDY7017205,coluzzii -VBS00356-4651STDY7017206,coluzzii -VBS00358-4651STDY7017207,coluzzii -VBS00359-4651STDY7017208,coluzzii -VBS00361-4651STDY7017209,coluzzii -VBS00364-4651STDY7017211,coluzzii -VBS00365-4651STDY7017212,coluzzii -VBS00366-4651STDY7017213,coluzzii -VBS00367-4651STDY7017214,coluzzii -VBS00368-4651STDY7017215,coluzzii -VBS00370-4651STDY7017216,coluzzii -VBS00371-4651STDY7017217,coluzzii -VBS00373-4651STDY7017218,coluzzii -VBS00375-4651STDY7017220,coluzzii -VBS00388-4651STDY7017222,arabiensis -VBS00449-4651STDY7017223,coluzzii -VBS00912-4651STDY7017225,coluzzii -VBS00913-4651STDY7017226,coluzzii -VBS00916-4651STDY7017227,coluzzii -VBS00917-4651STDY7017228,coluzzii -VBS00918-4651STDY7017229,coluzzii -VBS00919-4651STDY7017230,coluzzii -VBS00920-4651STDY7017231,coluzzii -VBS00922-4651STDY7017232,coluzzii -VBS00925-4651STDY7017233,coluzzii -VBS00926-4651STDY7017234,coluzzii -VBS00927-4651STDY7017235,coluzzii -VBS00928-4651STDY7017236,coluzzii -VBS00929-4651STDY7017237,coluzzii -VBS00931-4651STDY7017238,coluzzii -VBS00932-4651STDY7017239,coluzzii -VBS00933-4651STDY7017240,coluzzii -VBS00935-4651STDY7017241,coluzzii -VBS00937-4651STDY7017243,coluzzii -VBS00943-4651STDY7017245,coluzzii -VBS00944-4651STDY7017246,coluzzii -VBS00945-4651STDY7017247,coluzzii -VBS00949-4651STDY7017248,coluzzii -VBS00950-4651STDY7017249,coluzzii -VBS00951-4651STDY7017250,coluzzii -VBS00952-4651STDY7017251,coluzzii -VBS00953-4651STDY7017252,coluzzii -VBS00957-4651STDY7017253,coluzzii -VBS00958-4651STDY7017254,coluzzii -VBS00960-4651STDY7017255,coluzzii -VBS00961-4651STDY7017256,coluzzii -VBS00962-4651STDY7017257,coluzzii -VBS00964-4651STDY7017258,coluzzii -VBS00969-4651STDY7017259,coluzzii -VBS00970-4651STDY7017260,coluzzii -VBS00971-4651STDY7017261,coluzzii -VBS00972-4651STDY7017262,coluzzii -VBS00974-4651STDY7017263,coluzzii -VBS00975-4651STDY7017264,coluzzii -VBS00976-4651STDY7017265,coluzzii -VBS00977-4651STDY7017266,coluzzii -VBS00982-4651STDY7017267,coluzzii -VBS00983-4651STDY7017268,coluzzii -VBS00985-4651STDY7017269,coluzzii -VBS00986-4651STDY7017270,coluzzii -VBS00989-4651STDY7017271,coluzzii -VBS00991-4651STDY7017272,coluzzii -VBS00992-4651STDY7017273,coluzzii -VBS00994-4651STDY7017274,coluzzii -VBS00997-4651STDY7017275,coluzzii -VBS00999-4651STDY7017278,coluzzii -VBS01000-4651STDY7017279,coluzzii -VBS01001-4651STDY7017280,coluzzii -VBS01002-4651STDY7017281,coluzzii -VBS01004-4651STDY7017282,coluzzii -VBS01005-4651STDY7017283,coluzzii -VBS01006-4651STDY7017284,coluzzii -VBS01007-4651STDY7017285,coluzzii -VBS01008-4651STDY7017286,coluzzii -VBS01009-4651STDY7017287,coluzzii -VBS01012-4651STDY7017288,coluzzii -VBS01013-4651STDY7017289,coluzzii -VBS01014-4651STDY7017290,coluzzii -VBS01015-4651STDY7017291,coluzzii -VBS01016-4651STDY7017292,coluzzii -VBS01018-4651STDY7017293,coluzzii -VBS01026-4651STDY7017295,arabiensis -VBS01029-4651STDY7017296,coluzzii -VBS01030-4651STDY7017297,coluzzii -VBS01031-4651STDY7017298,coluzzii -VBS01037-4651STDY7017299,coluzzii -VBS01044-4651STDY7017301,coluzzii -VBS01045-4651STDY7017302,coluzzii -VBS01046-4651STDY7017303,coluzzii -VBS01048-4651STDY7017304,coluzzii -VBS01049-4651STDY7017305,coluzzii -VBS01050-4651STDY7017306,coluzzii -VBS01052-4651STDY7017307,gambiae -VBS01053-4651STDY7017308,arabiensis -VBS01054-4651STDY7017309,gambiae -VBS01055-4651STDY7017310,coluzzii -VBS01056-4651STDY7017311,gambiae -VBS01057-4651STDY7017312,coluzzii -VBS01059-4651STDY7017313,coluzzii -VBS01061-4651STDY7017314,gambiae -VBS01063-4651STDY7017315,coluzzii -VBS01064-4651STDY7017316,coluzzii -VBS01069-4651STDY7017317,arabiensis -VBS01070-4651STDY7017318,gambiae -VBS01071-4651STDY7017319,coluzzii -VBS01072-4651STDY7017320,coluzzii -VBS01073-4651STDY7017321,gambiae -VBS01074-4651STDY7017322,coluzzii -VBS01075-4651STDY7017323,coluzzii -VBS01078-4651STDY7017324,coluzzii -VBS01079-4651STDY7017325,coluzzii -VBS01080-4651STDY7017326,coluzzii -VBS01081-4651STDY7017327,coluzzii -VBS01082-4651STDY7017328,coluzzii -VBS01085-4651STDY7017329,arabiensis -VBS01086-4651STDY7017330,gambiae -VBS01087-4651STDY7017331,coluzzii -VBS01089-4651STDY7017332,coluzzii -VBS01093-4651STDY7017333,coluzzii -VBS01107-4651STDY7017335,arabiensis -VBS01108-4651STDY7017336,coluzzii -VBS01109-4651STDY7017337,coluzzii -VBS01111-4651STDY7017338,gambiae -VBS01112-4651STDY7017339,gambiae -VBS01115-4651STDY7017340,coluzzii -VBS01123-4651STDY7017342,arabiensis -VBS01124-4651STDY7017343,coluzzii -VBS01125-4651STDY7017344,arabiensis -VBS01126-4651STDY7017345,coluzzii -VBS01129-4651STDY7017346,arabiensis -VBS01139-4651STDY7017350,gambiae -VBS01140-4651STDY7017351,coluzzii -VBS01141-4651STDY7017352,gambiae -VBS01142-4651STDY7017353,coluzzii -VBS01144-4651STDY7017354,coluzzii -VBS01149-4651STDY7017355,coluzzii -VBS01150-4651STDY7017356,arabiensis -VBS01152-4651STDY7017357,coluzzii -VBS01158-4651STDY7017358,coluzzii -VBS01161-4651STDY7017359,coluzzii -VBS01162-4651STDY7017360,coluzzii -VBS01163-4651STDY7017361,gambiae -VBS01164-4651STDY7017362,coluzzii -VBS01166-4651STDY7017364,arabiensis -VBS01170-4651STDY7017365,arabiensis -VBS01171-4651STDY7017366,coluzzii -VBS01173-4651STDY7017367,arabiensis -VBS01174-4651STDY7017368,coluzzii -VBS01175-4651STDY7017369,coluzzii -VBS01178-4651STDY7017370,coluzzii -VBS01179-4651STDY7017371,arabiensis -VBS01180-4651STDY7017374,arabiensis -VBS01182-4651STDY7017375,coluzzii -VBS01184-4651STDY7017376,gambiae -VBS01187-4651STDY7017377,coluzzii -VBS01189-4651STDY7017379,arabiensis -VBS01191-4651STDY7017380,gambiae -VBS01194-4651STDY7017381,coluzzii -VBS01195-4651STDY7017382,coluzzii -VBS01197-4651STDY7017383,coluzzii -VBS01199-4651STDY7017384,gambiae -VBS01200-4651STDY7017385,coluzzii -VBS01201-4651STDY7017386,arabiensis -VBS01202-4651STDY7017387,coluzzii -VBS01203-4651STDY7017388,gambiae -VBS01204-4651STDY7017389,coluzzii -VBS01205-4651STDY7017390,coluzzii -VBS01207-4651STDY7017392,coluzzii -VBS01208-4651STDY7017393,coluzzii -VBS01209-4651STDY7017394,coluzzii -VBS01210-4651STDY7017395,coluzzii -VBS01211-4651STDY7017396,coluzzii -VBS01212-4651STDY7017397,coluzzii -VBS01213-4651STDY7017398,coluzzii -VBS01216-4651STDY7017400,coluzzii -VBS01218-4651STDY7017401,coluzzii -VBS01219-4651STDY7017402,coluzzii -VBS01222-4651STDY7017404,coluzzii -VBS01223-4651STDY7017405,coluzzii -VBS01224-4651STDY7017406,coluzzii -VBS01226-4651STDY7017407,coluzzii -VBS01227-4651STDY7017408,coluzzii -VBS01228-4651STDY7017409,coluzzii -VBS01229-4651STDY7017410,coluzzii -VBS01230-4651STDY7017411,coluzzii -VBS01231-4651STDY7017412,coluzzii -VBS01232-4651STDY7017413,coluzzii -VBS01233-4651STDY7017414,coluzzii -VBS01234-4651STDY7017415,coluzzii -VBS01235-4651STDY7017416,coluzzii -VBS01236-4651STDY7017417,coluzzii -VBS01237-4651STDY7017418,coluzzii -VBS01238-4651STDY7017419,coluzzii -VBS01239-4651STDY7017420,coluzzii -VBS01240-4651STDY7017421,coluzzii -VBS01241-4651STDY7017422,coluzzii -VBS01242-4651STDY7017423,coluzzii -VBS01244-4651STDY7017424,coluzzii -VBS01245-4651STDY7017425,coluzzii -VBS01246-4651STDY7017426,coluzzii -VBS01247-4651STDY7017427,coluzzii -VBS01248-4651STDY7017428,coluzzii -VBS01250-4651STDY7017429,coluzzii -VBS01251-4651STDY7017430,coluzzii -VBS01252-4651STDY7017431,coluzzii -VBS01253-4651STDY7017432,coluzzii -VBS01254-4651STDY7017433,coluzzii -VBS01256-4651STDY7017434,coluzzii -VBS01257-4651STDY7017435,coluzzii -VBS01258-4651STDY7017436,coluzzii -VBS01259-4651STDY7017437,coluzzii -VBS01261-4651STDY7017438,coluzzii -VBS01262-4651STDY7017439,coluzzii -VBS01265-4651STDY7017440,coluzzii -VBS01266-4651STDY7017441,coluzzii -VBS01267-4651STDY7017442,coluzzii -VBS01268-4651STDY7017443,coluzzii -VBS01269-4651STDY7017444,coluzzii -VBS01271-4651STDY7017445,coluzzii -VBS01272-4651STDY7017446,coluzzii -VBS01273-4651STDY7017447,coluzzii -VBS01274-4651STDY7017448,coluzzii -VBS01276-4651STDY7017450,coluzzii -VBS01277-4651STDY7017451,coluzzii -VBS01278-4651STDY7017452,coluzzii -VBS01279-4651STDY7017453,coluzzii -VBS01280-4651STDY7017454,coluzzii -VBS01291-4651STDY7017455,coluzzii -VBS01296-4651STDY7017456,coluzzii -VBS01297-4651STDY7017457,coluzzii -VBS01303-4651STDY7017458,coluzzii -VBS01304-4651STDY7017459,coluzzii -VBS01314-4651STDY7017460,coluzzii -VBS01315-4651STDY7017461,coluzzii -VBS01316-4651STDY7017462,coluzzii -VBS01317-4651STDY7017463,coluzzii -VBS01320-4651STDY7017464,arabiensis -VBS01322-4651STDY7017465,coluzzii -VBS01323-4651STDY7017466,coluzzii -VBS01328-4651STDY7017467,coluzzii -VBS01329-4651STDY7017470,coluzzii -VBS01330-4651STDY7017471,coluzzii -VBS01336-4651STDY7017473,coluzzii -VBS01338-4651STDY7017474,coluzzii -VBS01340-4651STDY7017475,coluzzii -VBS01341-4651STDY7017476,coluzzii -VBS01344-4651STDY7017477,coluzzii -VBS01345-4651STDY7017478,coluzzii -VBS01346-4651STDY7017479,coluzzii -VBS01347-4651STDY7017480,coluzzii -VBS01348-4651STDY7017481,coluzzii -VBS01350-4651STDY7017482,coluzzii -VBS01351-4651STDY7017483,coluzzii -VBS01352-4651STDY7017484,coluzzii -VBS01353-4651STDY7017485,coluzzii -VBS01354-4651STDY7017486,coluzzii -VBS01356-4651STDY7017487,coluzzii -VBS01358-4651STDY7017488,coluzzii -VBS01361-4651STDY7017489,coluzzii -VBS01362-4651STDY7017490,coluzzii -VBS01363-4651STDY7017491,coluzzii -VBS01365-4651STDY7017492,coluzzii -VBS01366-4651STDY7017493,coluzzii -VBS01367-4651STDY7017494,coluzzii -VBS01368-4651STDY7017495,coluzzii -VBS01369-4651STDY7017496,coluzzii -VBS01370-4651STDY7017497,coluzzii -VBS01372-4651STDY7017498,coluzzii -VBS01382-4651STDY7017499,coluzzii -VBS01389-4651STDY7017501,coluzzii -VBS01390-4651STDY7017502,coluzzii -VBS01391-4651STDY7017503,coluzzii -VBS01392-4651STDY7017504,coluzzii -VBS01397-4651STDY7017505,coluzzii -VBS01398-4651STDY7017506,coluzzii -VBS01399-4651STDY7017507,coluzzii -VBS01400-4651STDY7017508,coluzzii -VBS01401-4651STDY7017509,arabiensis -VBS01405-4651STDY7017510,coluzzii -VBS01406-4651STDY7017511,coluzzii -VBS01407-4651STDY7017512,coluzzii -VBS01408-4651STDY7017513,coluzzii -VBS01412-4651STDY7017514,gambiae -VBS01413-4651STDY7017515,gambiae -VBS01414-4651STDY7017516,coluzzii -VBS01415-4651STDY7017517,coluzzii -VBS01416-4651STDY7017518,gambiae -VBS01418-4651STDY7017519,coluzzii -VBS01420-4651STDY7017520,coluzzii -VBS01421-4651STDY7017521,coluzzii -VBS01428-4651STDY7017522,gambiae -VBS01430-4651STDY7017523,coluzzii -VBS01432-4651STDY7017525,coluzzii -VBS01433-4651STDY7017526,coluzzii -VBS01434-4651STDY7017527,coluzzii -VBS01435-4651STDY7017528,coluzzii -VBS01436-4651STDY7017529,coluzzii -VBS01438-4651STDY7017530,coluzzii -VBS01439-4651STDY7017531,coluzzii -VBS01442-4651STDY7017532,gambiae -VBS01444-4651STDY7017533,coluzzii -VBS01445-4651STDY7017534,coluzzii -VBS01446-4651STDY7017535,coluzzii -VBS01448-4651STDY7017537,coluzzii -VBS01451-4651STDY7017538,coluzzii -VBS01452-4651STDY7017539,coluzzii -VBS01454-4651STDY7017540,coluzzii -VBS01456-4651STDY7017541,coluzzii -VBS01457-4651STDY7017542,coluzzii -VBS01462-4651STDY7017544,coluzzii -VBS01463-4651STDY7017545,coluzzii -VBS01464-4651STDY7017546,coluzzii -VBS01465-4651STDY7017547,coluzzii -VBS01466-4651STDY7017548,arabiensis -VBS01467-4651STDY7017549,coluzzii -VBS01470-4651STDY7017550,coluzzii -VBS01472-4651STDY7017551,coluzzii -VBS01473-4651STDY7017552,coluzzii -VBS01475-4651STDY7017553,gambiae -VBS01476-4651STDY7017554,coluzzii -VBS01477-4651STDY7017555,coluzzii -VBS01478-4651STDY7017556,coluzzii -VBS01484-4651STDY7017557,coluzzii -VBS01492-4651STDY7017558,coluzzii -VBS01503-4651STDY7017559,coluzzii -VBS01506-4651STDY7017560,coluzzii -VBS01508-4651STDY7017561,coluzzii -VBS01509-4651STDY7017562,coluzzii -VBS01510-4651STDY7017563,coluzzii -VBS01511-4651STDY7017566,coluzzii -VBS01513-4651STDY7017567,unassigned -VBS01516-4651STDY7017568,coluzzii -VBS01517-4651STDY7017569,coluzzii -VBS01518-4651STDY7017570,arabiensis -VBS01519-4651STDY7017571,coluzzii -VBS01520-4651STDY7017572,coluzzii -VBS01521-4651STDY7017573,gambiae -VBS01522-4651STDY7017574,coluzzii -VBS01524-4651STDY7017575,coluzzii -VBS01525-4651STDY7017576,coluzzii -VBS01526-4651STDY7017577,coluzzii -VBS01527-4651STDY7017578,gambiae -VBS01528-4651STDY7017579,gambiae -VBS01530-4651STDY7017580,coluzzii -VBS01532-4651STDY7017581,coluzzii -VBS01533-4651STDY7017582,coluzzii -VBS01534-4651STDY7017583,gambiae -VBS01535-4651STDY7017584,gambiae -VBS01536-4651STDY7017585,arabiensis -VBS01537-4651STDY7017586,gambiae -VBS01538-4651STDY7017587,gambiae -VBS01540-4651STDY7017588,gambiae -VBS01541-4651STDY7017589,coluzzii -VBS01542-4651STDY7017590,coluzzii -VBS01543-4651STDY7017591,coluzzii -VBS01544-4651STDY7017592,coluzzii -VBS01546-4651STDY7017593,coluzzii -VBS01548-4651STDY7017594,coluzzii -VBS01549-4651STDY7017595,coluzzii -VBS01551-4651STDY7017596,coluzzii -VBS01552-4651STDY7017597,coluzzii -VBS01553-4651STDY7017598,gambiae -VBS01554-4651STDY7017599,coluzzii -VBS01556-4651STDY7017600,gambiae -VBS01557-4651STDY7017601,gambiae -VBS01558-4651STDY7017602,coluzzii -VBS01562-4651STDY7017603,coluzzii -VBS01564-4651STDY7017604,coluzzii -VBS01565-4651STDY7017605,coluzzii -VBS01566-4651STDY7017606,arabiensis -VBS01569-4651STDY7017607,gambiae -VBS01570-4651STDY7017608,coluzzii -VBS01571-4651STDY7017609,gambiae -VBS01572-4651STDY7017610,coluzzii -VBS01574-4651STDY7017611,arabiensis -VBS01575-4651STDY7017612,arabiensis -VBS01576-4651STDY7017613,coluzzii -VBS01578-4651STDY7017614,gambiae -VBS01579-4651STDY7017615,arabiensis -VBS01580-4651STDY7017616,coluzzii -VBS01583-4651STDY7017617,gambiae -VBS01584-4651STDY7017618,coluzzii -VBS01586-4651STDY7017619,coluzzii -VBS01587-4651STDY7017620,coluzzii -VBS01590-4651STDY7017621,arabiensis -VBS01591-4651STDY7017622,coluzzii -VBS01592-4651STDY7017623,arabiensis -VBS01594-4651STDY7017624,arabiensis -VBS01595-4651STDY7017625,gambiae -VBS01596-4651STDY7017626,gambiae -VBS01606-4651STDY7017629,coluzzii -VBS01608-4651STDY7017630,coluzzii -VBS01610-4651STDY7017631,coluzzii -VBS01615-4651STDY7017632,coluzzii -VBS01631-4651STDY7017634,coluzzii -VBS01640-4651STDY7017637,coluzzii -VBS01647-4651STDY7017640,coluzzii -VBS01658-4651STDY7017643,coluzzii -VBS01664-4651STDY7017645,coluzzii -VBS01669-4651STDY7017646,coluzzii -VBS01670-4651STDY7017647,coluzzii -VBS01672-4651STDY7017648,coluzzii -VBS01678-4651STDY7017651,coluzzii -VBS01679-4651STDY7017652,coluzzii -VBS01680-4651STDY7017653,coluzzii -VBS01688-4651STDY7017655,coluzzii -VBS01689-4651STDY7017656,coluzzii -VBS01690-4651STDY7017657,coluzzii -VBS01693-4651STDY7017658,coluzzii -VBS01695-4651STDY7017659,coluzzii -VBS01696-4651STDY7017662,coluzzii -VBS01698-4651STDY7017664,coluzzii -VBS01702-4651STDY7017666,coluzzii -VBS01705-4651STDY7017668,coluzzii -VBS01708-4651STDY7017669,coluzzii -VBS01709-4651STDY7017670,coluzzii -VBS01713-4651STDY7017673,coluzzii -VBS01714-4651STDY7017674,coluzzii -VBS01715-4651STDY7017675,coluzzii -VBS01721-4651STDY7017676,coluzzii -VBS01722-4651STDY7017677,coluzzii -VBS01726-4651STDY7017679,coluzzii -VBS01727-4651STDY7017680,coluzzii -VBS01728-4651STDY7017681,coluzzii -VBS01730-4651STDY7017683,coluzzii -VBS01735-4651STDY7017685,coluzzii -VBS01736-4651STDY7017686,coluzzii -VBS01737-4651STDY7017687,coluzzii -VBS01740-4651STDY7017688,coluzzii -VBS01743-4651STDY7017690,coluzzii -VBS01746-4651STDY7017692,coluzzii -VBS01747-4651STDY7017693,coluzzii -VBS01749-4651STDY7017694,coluzzii -VBS01750-4651STDY7017695,coluzzii -VBS01751-4651STDY7017696,coluzzii -VBS01752-4651STDY7017697,coluzzii -VBS01753-4651STDY7017698,coluzzii -VBS01754-4651STDY7017699,coluzzii -VBS01755-4651STDY7017700,coluzzii -VBS01760-4651STDY7017704,coluzzii -VBS01762-4651STDY7017706,coluzzii -VBS01764-4651STDY7017708,coluzzii -VBS01766-4651STDY7017709,coluzzii -VBS01767-4651STDY7017710,coluzzii -VBS01768-4651STDY7017711,coluzzii -VBS01770-4651STDY7017713,coluzzii -VBS01771-4651STDY7017714,coluzzii -VBS01772-4651STDY7017715,coluzzii -VBS01773-4651STDY7017716,coluzzii -VBS01774-4651STDY7017717,coluzzii -VBS01775-4651STDY7017718,coluzzii -VBS01776-4651STDY7017719,coluzzii -VBS01779-4651STDY7017722,coluzzii -VBS01780-4651STDY7017723,coluzzii -VBS01781-4651STDY7017724,coluzzii -VBS01782-4651STDY7017725,coluzzii -VBS01784-4651STDY7017727,coluzzii -VBS01785-4651STDY7017728,coluzzii -VBS01787-4651STDY7017730,coluzzii -VBS01788-4651STDY7017731,coluzzii -VBS01789-4651STDY7017732,coluzzii -VBS01790-4651STDY7017733,coluzzii -VBS01791-4651STDY7017734,coluzzii -VBS01792-4651STDY7017735,coluzzii -VBS01793-4651STDY7017736,coluzzii -VBS01794-4651STDY7017737,coluzzii -VBS01795-4651STDY7017738,coluzzii -VBS01796-4651STDY7017739,coluzzii -VBS01798-4651STDY7017741,coluzzii -VBS01799-4651STDY7017742,coluzzii -VBS01801-4651STDY7017743,coluzzii -VBS01802-4651STDY7017744,coluzzii -VBS01803-4651STDY7017745,coluzzii -VBS01804-4651STDY7017746,coluzzii -VBS01805-4651STDY7017747,coluzzii -VBS01806-4651STDY7017748,coluzzii -VBS01807-4651STDY7017749,coluzzii -VBS01810-4651STDY7017752,coluzzii -VBS01812-4651STDY7017754,coluzzii -VBS01813-4651STDY7017755,coluzzii -VBS01816-4651STDY7017760,coluzzii -VBS01817-4651STDY7017761,coluzzii -VBS01818-4651STDY7017762,coluzzii -VBS01820-4651STDY7017764,coluzzii -VBS01821-4651STDY7017765,coluzzii -VBS01823-4651STDY7017767,coluzzii -VBS01824-4651STDY7017768,coluzzii -VBS01825-4651STDY7017769,coluzzii -VBS01827-4651STDY7017771,coluzzii -VBS01828-4651STDY7017772,coluzzii -VBS01829-4651STDY7017773,coluzzii -VBS01830-4651STDY7017774,coluzzii -VBS01831-4651STDY7017775,coluzzii -VBS01832-4651STDY7017776,coluzzii -VBS01833-4651STDY7017777,coluzzii -VBS01834-4651STDY7017778,coluzzii -VBS01836-4651STDY7017780,coluzzii -VBS01838-4651STDY7017781,coluzzii -VBS01839-4651STDY7017782,coluzzii -VBS01840-4651STDY7017783,coluzzii -VBS01841-4651STDY7017784,coluzzii -VBS01842-4651STDY7017785,coluzzii -VBS01843-4651STDY7017786,coluzzii -VBS01844-4651STDY7017787,coluzzii -VBS01846-4651STDY7017788,coluzzii -VBS01847-4651STDY7017789,coluzzii -VBS01848-4651STDY7017790,coluzzii -VBS01850-4651STDY7017792,coluzzii -VBS01851-4651STDY7017793,coluzzii -VBS01852-4651STDY7017794,coluzzii -VBS01853-4651STDY7017795,coluzzii -VBS01858-4651STDY7017796,coluzzii -VBS01860-4651STDY7017797,coluzzii -VBS01861-4651STDY7017798,coluzzii -VBS01862-4651STDY7017799,coluzzii -VBS01864-4651STDY7017800,coluzzii -VBS01865-4651STDY7017801,coluzzii -VBS01866-4651STDY7017802,coluzzii -VBS01868-4651STDY7017803,coluzzii -VBS01869-4651STDY7017804,coluzzii -VBS01871-4651STDY7017806,coluzzii -VBS01873-4651STDY7017807,coluzzii -VBS01874-4651STDY7017808,coluzzii -VBS01876-4651STDY7017809,coluzzii -VBS01878-4651STDY7017811,coluzzii -VBS01884-4651STDY7017814,coluzzii -VBS01885-4651STDY7017815,coluzzii -VBS01886-4651STDY7017816,coluzzii -VBS01887-4651STDY7017817,coluzzii -VBS01888-4651STDY7017818,coluzzii -VBS01889-4651STDY7017819,coluzzii -VBS01890-4651STDY7017820,coluzzii -VBS01892-4651STDY7017821,coluzzii -VBS01893-4651STDY7017822,coluzzii -VBS01896-4651STDY7017823,coluzzii -VBS01897-4651STDY7017824,coluzzii -VBS01898-4651STDY7017825,coluzzii -VBS01899-4651STDY7017826,coluzzii -VBS01902-4651STDY7017829,coluzzii -VBS01903-4651STDY7017830,coluzzii -VBS01904-4651STDY7017831,coluzzii -VBS01906-4651STDY7017833,coluzzii -VBS01907-4651STDY7017834,coluzzii -VBS01908-4651STDY7017835,coluzzii -VBS01909-4651STDY7017836,coluzzii -VBS01910-4651STDY7017837,coluzzii -VBS01911-4651STDY7017838,coluzzii -VBS01912-4651STDY7017839,coluzzii -VBS01913-4651STDY7017840,coluzzii -VBS01914-4651STDY7017841,coluzzii -VBS01915-4651STDY7017842,coluzzii -VBS01916-4651STDY7017843,coluzzii -VBS01917-4651STDY7017844,coluzzii -VBS01918-4651STDY7017845,coluzzii -VBS01919-4651STDY7017846,coluzzii -VBS01920-4651STDY7017847,coluzzii -VBS01921-4651STDY7017848,coluzzii -VBS01922-4651STDY7017849,coluzzii -VBS01924-4651STDY7017850,coluzzii -VBS01925-4651STDY7017851,coluzzii -VBS01926-4651STDY7017854,coluzzii -VBS01927-4651STDY7017855,coluzzii -VBS01928-4651STDY7017856,coluzzii -VBS01929-4651STDY7017857,coluzzii -VBS01930-4651STDY7017858,coluzzii -VBS01931-4651STDY7017859,coluzzii -VBS01932-4651STDY7017860,coluzzii -VBS01933-4651STDY7017861,coluzzii -VBS01934-4651STDY7017862,coluzzii -VBS01935-4651STDY7017863,coluzzii -VBS01937-4651STDY7017864,coluzzii -VBS01938-4651STDY7017865,coluzzii -VBS01939-4651STDY7017866,coluzzii -VBS01940-4651STDY7017867,coluzzii -VBS01941-4651STDY7017868,coluzzii -VBS01942-4651STDY7017869,coluzzii -VBS01943-4651STDY7017870,coluzzii -VBS01944-4651STDY7017871,coluzzii -VBS01945-4651STDY7017872,coluzzii -VBS01946-4651STDY7017873,coluzzii -VBS01947-4651STDY7017874,coluzzii -VBS01948-4651STDY7017875,coluzzii -VBS01949-4651STDY7017876,coluzzii -VBS01950-4651STDY7017877,coluzzii -VBS01951-4651STDY7017878,coluzzii -VBS01952-4651STDY7017879,coluzzii -VBS01953-4651STDY7017880,coluzzii -VBS01954-4651STDY7017881,coluzzii -VBS01955-4651STDY7017882,coluzzii -VBS01956-4651STDY7017883,coluzzii -VBS01957-4651STDY7017884,coluzzii -VBS01958-4651STDY7017885,coluzzii -VBS01959-4651STDY7017886,coluzzii -VBS01960-4651STDY7017887,coluzzii -VBS01961-4651STDY7017888,coluzzii -VBS01962-4651STDY7017889,coluzzii -VBS01963-4651STDY7017890,coluzzii -VBS01964-4651STDY7017891,coluzzii -VBS01965-4651STDY7017892,coluzzii -VBS01966-4651STDY7017893,coluzzii -VBS01968-4651STDY7017895,coluzzii -VBS01970-4651STDY7017896,coluzzii -VBS01971-4651STDY7017897,coluzzii -VBS01972-4651STDY7017898,coluzzii -VBS01973-4651STDY7017899,coluzzii -VBS01974-4651STDY7017900,coluzzii -VBS01975-4651STDY7017901,coluzzii -VBS01976-4651STDY7017902,coluzzii -VBS01978-4651STDY7017903,coluzzii -VBS01979-4651STDY7017904,coluzzii -VBS01980-4651STDY7017905,coluzzii -VBS01981-4651STDY7017906,coluzzii -VBS01982-4651STDY7017907,coluzzii -VBS01983-4651STDY7017908,coluzzii -VBS01984-4651STDY7017909,coluzzii -VBS01985-4651STDY7017910,coluzzii -VBS01986-4651STDY7017911,coluzzii -VBS01987-4651STDY7017912,coluzzii -VBS01990-4651STDY7017915,coluzzii -VBS01991-4651STDY7017916,coluzzii -VBS01992-4651STDY7017917,coluzzii -VBS01994-4651STDY7017918,coluzzii -VBS01996-4651STDY7017920,coluzzii -VBS01997-4651STDY7017921,coluzzii -VBS01998-4651STDY7017922,coluzzii -VBS02000-4651STDY7017924,coluzzii -VBS02001-4651STDY7017925,coluzzii -VBS02002-4651STDY7017926,coluzzii -VBS06347-4651STDY7017927,coluzzii diff --git a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/general/1177-VO-ML-LEHMANN-VMF00004/samples.meta.csv b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/general/1177-VO-ML-LEHMANN-VMF00004/samples.meta.csv index ad8c8848c..3cd99ae3d 100644 --- a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/general/1177-VO-ML-LEHMANN-VMF00004/samples.meta.csv +++ b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/general/1177-VO-ML-LEHMANN-VMF00004/samples.meta.csv @@ -1,4 +1,7 @@ sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call +VBS00388-4651STDY7017222,GP229,Tovi Lehmann,Mali,Dallowere,2012,10,13.616,-7.037,F +VBS01026-4651STDY7017295,GP867,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F +VBS01053-4651STDY7017308,GP894,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS00256-4651STDY7017184,GP97,Tovi Lehmann,Mali,Dallowere,2012,6,13.616,-7.037,F VBS00257-4651STDY7017185,GP98,Tovi Lehmann,Mali,Dallowere,2012,6,13.616,-7.037,F VBS00259-4651STDY7017186,GP100,Tovi Lehmann,Mali,Dallowere,2012,6,13.616,-7.037,F @@ -31,7 +34,6 @@ VBS00370-4651STDY7017216,GP211,Tovi Lehmann,Mali,Dallowere,2012,10,13.616,-7.037 VBS00371-4651STDY7017217,GP212,Tovi Lehmann,Mali,Dallowere,2012,10,13.616,-7.037,F VBS00373-4651STDY7017218,GP214,Tovi Lehmann,Mali,Dallowere,2012,11,13.616,-7.037,F VBS00375-4651STDY7017220,GP216,Tovi Lehmann,Mali,Dallowere,2012,11,13.616,-7.037,F -VBS00388-4651STDY7017222,GP229,Tovi Lehmann,Mali,Dallowere,2012,10,13.616,-7.037,F VBS00449-4651STDY7017223,GP290,Tovi Lehmann,Mali,Dallowere,2012,9,13.616,-7.037,F VBS00912-4651STDY7017225,GP753,Tovi Lehmann,Mali,Dallowere,2014,5,13.616,-7.037,F VBS00913-4651STDY7017226,GP754,Tovi Lehmann,Mali,Dallowere,2014,5,13.616,-7.037,F @@ -98,7 +100,6 @@ VBS01014-4651STDY7017290,GP855,Tovi Lehmann,Mali,Dallowere,2014,6,13.616,-7.037, VBS01015-4651STDY7017291,GP856,Tovi Lehmann,Mali,Dallowere,2014,6,13.616,-7.037,F VBS01016-4651STDY7017292,GP857,Tovi Lehmann,Mali,Dallowere,2014,6,13.616,-7.037,F VBS01018-4651STDY7017293,GP859,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F -VBS01026-4651STDY7017295,GP867,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01029-4651STDY7017296,GP870,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01030-4651STDY7017297,GP871,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01031-4651STDY7017298,GP872,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F @@ -110,7 +111,6 @@ VBS01048-4651STDY7017304,GP889,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037 VBS01049-4651STDY7017305,GP890,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01050-4651STDY7017306,GP891,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01052-4651STDY7017307,GP893,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F -VBS01053-4651STDY7017308,GP894,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01054-4651STDY7017309,GP895,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01055-4651STDY7017310,GP896,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F VBS01056-4651STDY7017311,GP897,Tovi Lehmann,Mali,Dallowere,2014,10,13.616,-7.037,F diff --git a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/species_calls_aim_20220528/1177-VO-ML-LEHMANN-VMF00004/samples.species_aim.csv b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/species_calls_aim_20220528/1177-VO-ML-LEHMANN-VMF00004/samples.species_aim.csv index 567036d41..b548d75fb 100644 --- a/tests/anoph/fixture/vo_agam_release/v3.1/metadata/species_calls_aim_20220528/1177-VO-ML-LEHMANN-VMF00004/samples.species_aim.csv +++ b/tests/anoph/fixture/vo_agam_release/v3.1/metadata/species_calls_aim_20220528/1177-VO-ML-LEHMANN-VMF00004/samples.species_aim.csv @@ -1,4 +1,7 @@ sample_id,aim_species_fraction_arab,aim_species_fraction_colu,aim_species_fraction_colu_no2L,aim_species_gambcolu_arabiensis,aim_species_gambiae_coluzzii,aim_species +VBS00388-4651STDY7017222,0.976,0.474,0.472,arabiensis,NA,arabiensis +VBS01026-4651STDY7017295,0.977,0.465,0.469,arabiensis,NA,arabiensis +VBS01053-4651STDY7017308,0.983,0.456,0.453,arabiensis,NA,arabiensis VBS00256-4651STDY7017184,0.002,0.858,0.973,gambcolu,coluzzii,coluzzii VBS00257-4651STDY7017185,0.002,0.977,0.982,gambcolu,coluzzii,coluzzii VBS00259-4651STDY7017186,0.001,0.917,0.974,gambcolu,coluzzii,coluzzii @@ -31,7 +34,6 @@ VBS00370-4651STDY7017216,0.001,0.858,0.974,gambcolu,coluzzii,coluzzii VBS00371-4651STDY7017217,0.002,0.880,0.977,gambcolu,coluzzii,coluzzii VBS00373-4651STDY7017218,0.001,0.869,0.982,gambcolu,coluzzii,coluzzii VBS00375-4651STDY7017220,0.002,0.861,0.978,gambcolu,coluzzii,coluzzii -VBS00388-4651STDY7017222,0.976,0.474,0.472,arabiensis,NA,arabiensis VBS00449-4651STDY7017223,0.005,0.923,0.983,gambcolu,coluzzii,coluzzii VBS00912-4651STDY7017225,0.001,0.858,0.976,gambcolu,coluzzii,coluzzii VBS00913-4651STDY7017226,0.001,0.871,0.989,gambcolu,coluzzii,coluzzii @@ -98,7 +100,6 @@ VBS01014-4651STDY7017290,0.002,0.915,0.980,gambcolu,coluzzii,coluzzii VBS01015-4651STDY7017291,0.003,0.864,0.983,gambcolu,coluzzii,coluzzii VBS01016-4651STDY7017292,0.003,0.981,0.988,gambcolu,coluzzii,coluzzii VBS01018-4651STDY7017293,0.003,0.861,0.978,gambcolu,coluzzii,coluzzii -VBS01026-4651STDY7017295,0.977,0.465,0.469,arabiensis,NA,arabiensis VBS01029-4651STDY7017296,0.001,0.929,0.988,gambcolu,coluzzii,coluzzii VBS01030-4651STDY7017297,0.003,0.919,0.976,gambcolu,coluzzii,coluzzii VBS01031-4651STDY7017298,0.001,0.859,0.975,gambcolu,coluzzii,coluzzii @@ -110,7 +111,6 @@ VBS01048-4651STDY7017304,0.002,0.864,0.983,gambcolu,coluzzii,coluzzii VBS01049-4651STDY7017305,0.002,0.856,0.974,gambcolu,coluzzii,coluzzii VBS01050-4651STDY7017306,0.001,0.919,0.976,gambcolu,coluzzii,coluzzii VBS01052-4651STDY7017307,0.004,0.018,0.020,gambcolu,gambiae,gambiae -VBS01053-4651STDY7017308,0.983,0.456,0.453,arabiensis,NA,arabiensis VBS01054-4651STDY7017309,0.003,0.013,0.015,gambcolu,gambiae,gambiae VBS01055-4651STDY7017310,0.002,0.927,0.985,gambcolu,coluzzii,coluzzii VBS01056-4651STDY7017311,0.002,0.018,0.020,gambcolu,gambiae,gambiae diff --git a/tests/anoph/test_hap_data.py b/tests/anoph/test_hap_data.py new file mode 100644 index 000000000..c80475a49 --- /dev/null +++ b/tests/anoph/test_hap_data.py @@ -0,0 +1,487 @@ +import random + +import dask.array as da +import numpy as np +import pytest +import xarray as xr +import zarr +from pytest_cases import parametrize_with_cases + +from malariagen_data import af1 as _af1 +from malariagen_data import ag3 as _ag3 +from malariagen_data.anoph.hap_data import AnophelesHapData + + +@pytest.fixture +def ag3_sim_api(ag3_sim_fixture): + return AnophelesHapData( + url=ag3_sim_fixture.url, + config_path=_ag3.CONFIG_PATH, + gcs_url=_ag3.GCS_URL, + major_version_number=_ag3.MAJOR_VERSION_NUMBER, + major_version_path=_ag3.MAJOR_VERSION_PATH, + pre=True, + aim_metadata_dtype={ + "aim_species_fraction_arab": "float64", + "aim_species_fraction_colu": "float64", + "aim_species_fraction_colu_no2l": "float64", + "aim_species_gambcolu_arabiensis": object, + "aim_species_gambiae_coluzzii": object, + "aim_species": object, + }, + gff_gene_type="gene", + gff_default_attributes=("ID", "Parent", "Name", "description"), + results_cache=ag3_sim_fixture.results_cache_path.as_posix(), + default_phasing_analysis="gamb_colu_arab", + ) + + +@pytest.fixture +def af1_sim_api(af1_sim_fixture): + return AnophelesHapData( + url=af1_sim_fixture.url, + config_path=_af1.CONFIG_PATH, + gcs_url=_af1.GCS_URL, + major_version_number=_af1.MAJOR_VERSION_NUMBER, + major_version_path=_af1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_default_attributes=("ID", "Parent", "Note", "description"), + results_cache=af1_sim_fixture.results_cache_path.as_posix(), + default_phasing_analysis="funestus", + ) + + +# N.B., here we use pytest_cases to parametrize tests. Each +# function whose name begins with "case_" defines a set of +# inputs to the test functions. See the documentation for +# pytest_cases for more information, e.g.: +# +# https://smarie.github.io/python-pytest-cases/#basic-usage +# +# We use this approach here because we want to use fixtures +# as test parameters, which is otherwise hard to do with +# pytest alone. + + +def case_ag3_sim(ag3_sim_fixture, ag3_sim_api): + return ag3_sim_fixture, ag3_sim_api + + +def case_af1_sim(af1_sim_fixture, af1_sim_api): + return af1_sim_fixture, af1_sim_api + + +def _check_haplotype_sites(root, api: AnophelesHapData): + assert isinstance(root, zarr.hierarchy.Group) + for contig in api.contigs: + assert contig in root + contig_grp = root[contig] + assert "variants" in contig_grp + variants = contig_grp["variants"] + assert "POS" in variants + assert "REF" in variants + assert "ALT" in variants + + +@parametrize_with_cases("fixture,api", cases=".") +def test_open_haplotype_sites(fixture, api: AnophelesHapData): + # Test default analysis. + root = api.open_haplotype_sites() + _check_haplotype_sites(root, api) + + # Test specific analyses. + for analysis in api.phasing_analysis_ids: + root = api.open_haplotype_sites(analysis=analysis) + _check_haplotype_sites(root, api) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_open_haplotypes(fixture, api: AnophelesHapData): + for rec in api.sample_sets().itertuples(): + sample_set = rec.sample_set + for analysis in api.phasing_analysis_ids: + # How many samples do we expect? + expected_samples = fixture.phasing_samples[sample_set, analysis] + + # How many sites do we expect? + expected_n_sites = fixture.n_hap_sites[analysis] + + # Access haplotype data. + root = api.open_haplotypes(sample_set=sample_set, analysis=analysis) + if len(expected_samples) == 0: + assert root is None + else: + assert isinstance(root, zarr.hierarchy.Group) + + # Check samples array. + assert "samples" in root + samples = root["samples"][:] + assert samples.ndim == 1 + assert samples.dtype.kind == "O" + assert samples.shape[0] == len(expected_samples) + + # Check calldata arrays. + for contig in api.contigs: + assert contig in root + contig_grp = root[contig] + + assert "calldata" in contig_grp + calldata = contig_grp["calldata"] + assert "GT" in calldata + gt = calldata["GT"] + assert gt.ndim == 3 + assert gt.dtype == "i1" + assert gt.shape[0] == expected_n_sites[contig] + assert gt.shape[1] == len(expected_samples) + assert gt.shape[2] == 2 + + +def check_haplotypes( + fixture, + api: AnophelesHapData, + sample_sets, + region, + analysis, + sample_query=None, + cohort_size=None, + min_cohort_size=None, + max_cohort_size=None, +): + # Set up test, figure out how many samples phased in the analysis. + sample_sets_prepped = api._prep_sample_sets_param(sample_sets=sample_sets) + samples_phased = np.concatenate( + [ + fixture.phasing_samples[sample_set, analysis] + for sample_set in sample_sets_prepped + ] + ) + n_samples_phased = len(samples_phased) + + # Check if no samples phased in the analysis. + if n_samples_phased == 0: + with pytest.raises(ValueError): + ds = api.haplotypes( + region=region, + sample_sets=sample_sets, + analysis=analysis, + sample_query=sample_query, + ) + return + + # Handle sample query. + if sample_query is not None: + df_samples = api.sample_metadata(sample_sets=sample_sets) + df_samples = df_samples.set_index("sample_id") + df_samples_phased = df_samples.loc[samples_phased].reset_index() + df_samples_queried = df_samples_phased.query(sample_query) + samples_selected = df_samples_queried["sample_id"].values + else: + samples_selected = samples_phased + n_samples_selected = len(samples_selected) + + # Check if no samples matching selection. + if n_samples_selected == 0: + with pytest.raises(ValueError): + ds = api.haplotypes( + region=region, + sample_sets=sample_sets, + analysis=analysis, + sample_query=sample_query, + ) + return + + # Check if not enough samples for requested cohort size. + if cohort_size and n_samples_selected < cohort_size: + with pytest.raises(ValueError): + ds = api.haplotypes( + region=region, + sample_sets=sample_sets, + analysis=analysis, + sample_query=sample_query, + cohort_size=cohort_size, + ) + return + + # Check if not enough samples for requested minimum cohort size. + if min_cohort_size and n_samples_selected < min_cohort_size: + with pytest.raises(ValueError): + ds = api.haplotypes( + region=region, + sample_sets=sample_sets, + analysis=analysis, + sample_query=sample_query, + min_cohort_size=min_cohort_size, + ) + return + + # Call function to be tested. + ds = api.haplotypes( + region=region, + sample_sets=sample_sets, + analysis=analysis, + sample_query=sample_query, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + + # Check return type. + assert isinstance(ds, xr.Dataset) + + # Check variables. + expected_data_vars = { + "variant_allele", + "call_genotype", + } + assert set(ds.data_vars) == expected_data_vars + expected_coords = { + "variant_contig", + "variant_position", + "sample_id", + } + assert set(ds.coords) == expected_coords + + # Check dimensions. + assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} + + # Check samples. + samples = ds["sample_id"].values + if cohort_size or max_cohort_size: + # N.B., there may have been some down-sampling. + selected_samples_set = set(samples_selected) + assert all([s in selected_samples_set for s in samples]) + else: + assert set(samples) == set(samples_selected) + + # Check dim lengths. + if cohort_size: + n_samples_expected = cohort_size + elif max_cohort_size: + n_samples_expected = min(n_samples_selected, max_cohort_size) + else: + n_samples_expected = n_samples_selected + n_samples = ds.dims["samples"] + assert n_samples == n_samples_expected + if min_cohort_size: + assert n_samples >= min_cohort_size + assert ds.dims["ploidy"] == 2 + assert ds.dims["alleles"] == 2 + + # Check shapes. + for f in expected_coords | expected_data_vars: + x = ds[f] + assert isinstance(x, xr.DataArray) + assert isinstance(x.data, da.Array) + + if f == "variant_allele": + assert x.ndim == 2 + assert x.shape[1] == 2 + assert x.dims == ("variants", "alleles") + elif f.startswith("variant_"): + assert x.ndim == 1 + assert x.dims == ("variants",) + elif f == "call_genotype": + assert x.ndim == 3 + assert x.dims == ("variants", "samples", "ploidy") + assert x.shape[1] == n_samples_expected + assert x.shape[2] == 2 + + # Check attributes. + assert "contigs" in ds.attrs + assert ds.attrs["contigs"] == api.contigs + + # Check can set up computations. + d1 = ds["variant_position"] > 10_000 + assert isinstance(d1, xr.DataArray) + d2 = ds["call_genotype"].sum(axis=(1, 2)) + assert isinstance(d2, xr.DataArray) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_haplotypes_with_sample_sets_param(fixture, api: AnophelesHapData): + # Fixed parameters. + region = fixture.random_region_str() + analysis = api.phasing_analysis_ids[0] + + # Parametrize sample_sets. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + all_releases = api.releases + parametrize_sample_sets = [ + None, + random.choice(all_sample_sets), + random.sample(all_sample_sets, 2), + random.choice(all_releases), + ] + + # Run tests. + for sample_sets in parametrize_sample_sets: + check_haplotypes( + fixture=fixture, + api=api, + sample_sets=sample_sets, + region=region, + analysis=analysis, + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_haplotypes_with_region_param(fixture, api: AnophelesHapData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + analysis = api.phasing_analysis_ids[0] + + # Parametrize region. + contig = fixture.random_contig() + df_gff = api.genome_features(attributes=["ID"]) + parametrize_region = [ + contig, + fixture.random_region_str(), + [fixture.random_region_str(), fixture.random_region_str()], + random.choice(df_gff["ID"].dropna().to_list()), + ] + + # Run tests. + for region in parametrize_region: + check_haplotypes( + fixture=fixture, + api=api, + sample_sets=sample_sets, + region=region, + analysis=analysis, + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_haplotypes_with_analysis_param(fixture, api: AnophelesHapData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + + # Parametrize analysis. + parametrize_analysis = api.phasing_analysis_ids + + # Run tests. + for analysis in parametrize_analysis: + check_haplotypes( + fixture=fixture, + api=api, + sample_sets=sample_sets, + region=region, + analysis=analysis, + ) + + +def test_haplotypes_with_sample_query_param( + ag3_sim_fixture, ag3_sim_api: AnophelesHapData +): + api = ag3_sim_api + fixture = ag3_sim_fixture + + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + analysis = api.phasing_analysis_ids[0] + + # Parametrize analysis. + parametrize_query = ["sex_call == 'F'", "taxon == 'coluzzii'", "taxon == 'robot'"] + + # Run tests. + for sample_query in parametrize_query: + check_haplotypes( + fixture=fixture, + api=api, + sample_sets=sample_sets, + region=region, + analysis=analysis, + sample_query=sample_query, + ) + + +def test_haplotypes_with_cohort_size_param( + ag3_sim_fixture, ag3_sim_api: AnophelesHapData +): + api = ag3_sim_api + fixture = ag3_sim_fixture + + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + analysis = api.phasing_analysis_ids[0] + + # Parametrize over cohort_size. + parametrize_cohort_size = [random.randint(1, 10), random.randint(10, 50), 1_000] + for cohort_size in parametrize_cohort_size: + check_haplotypes( + fixture=fixture, + api=api, + sample_sets=sample_sets, + region=region, + analysis=analysis, + sample_query=None, + cohort_size=cohort_size, + ) + + +def test_haplotypes_with_min_cohort_size_param( + ag3_sim_fixture, ag3_sim_api: AnophelesHapData +): + api = ag3_sim_api + fixture = ag3_sim_fixture + + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + analysis = api.phasing_analysis_ids[0] + + # Parametrize over min_cohort_size. + parametrize_min_cohort_size = [ + random.randint(1, 10), + random.randint(10, 50), + 1_000, + ] + for min_cohort_size in parametrize_min_cohort_size: + check_haplotypes( + fixture=fixture, + api=api, + sample_sets=sample_sets, + region=region, + analysis=analysis, + sample_query=None, + min_cohort_size=min_cohort_size, + ) + + +def test_haplotypes_with_max_cohort_size_param( + ag3_sim_fixture, ag3_sim_api: AnophelesHapData +): + api = ag3_sim_api + fixture = ag3_sim_fixture + + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + analysis = api.phasing_analysis_ids[0] + + # Parametrize over max_cohort_size. + parametrize_max_cohort_size = [ + random.randint(1, 10), + random.randint(10, 50), + 1_000, + ] + for max_cohort_size in parametrize_max_cohort_size: + check_haplotypes( + fixture=fixture, + api=api, + sample_sets=sample_sets, + region=region, + analysis=analysis, + sample_query=None, + max_cohort_size=max_cohort_size, + ) diff --git a/tests/anoph/test_sample_metadata.py b/tests/anoph/test_sample_metadata.py index b61f76e01..3c29d31f5 100644 --- a/tests/anoph/test_sample_metadata.py +++ b/tests/anoph/test_sample_metadata.py @@ -45,6 +45,28 @@ def af1_sim_api(af1_sim_fixture): ) +@pytest.fixture +def missing_metadata_api(fixture_dir): + # In this fixture, one of the sample sets (AG1000G-BF-A) has missing files + # for both AIM and cohorts metadata. + return AnophelesSampleMetadata( + url=(fixture_dir / "missing_metadata").as_uri(), + config_path="config.json", + gcs_url=None, + major_version_number=3, + major_version_path="v3", + pre=False, + aim_metadata_dtype={ + "aim_species_fraction_arab": "float64", + "aim_species_fraction_colu": "float64", + "aim_species_fraction_colu_no2l": "float64", + "aim_species_gambcolu_arabiensis": object, + "aim_species_gambiae_coluzzii": object, + "aim_species": object, + }, + ) + + def case_ag3_sim(ag3_sim_fixture, ag3_sim_api): return ag3_sim_fixture, ag3_sim_api @@ -212,6 +234,28 @@ def test_aim_metadata_with_release(ag3_sim_api): assert len(df) == expected_len +def test_aim_metadata_with_missing_file( + missing_metadata_api: AnophelesSampleMetadata, +): + # In this test, one of the sample sets (AG1000G-BF-A) has a missing file. + # We expect this to be filled with empty values. + api = missing_metadata_api + + # Set up test. + df_sample_sets = api.sample_sets().set_index("sample_set") + sample_count = df_sample_sets["sample_count"] + all_sample_sets = df_sample_sets.index.to_list() + + for sample_set in all_sample_sets: + # Call function to be tested. + df = api.aim_metadata(sample_sets=sample_set) + + # Check output. + validate_aim_metadata(df) + expected_len = sample_count.loc[sample_set] + assert len(df) == expected_len + + def cohorts_metadata_expected_columns(has_cohorts_by_quarter): if has_cohorts_by_quarter: return { @@ -302,6 +346,28 @@ def test_cohorts_metadata_with_release(fixture, api: AnophelesSampleMetadata): assert len(df) == expected_len +def test_cohorts_metadata_with_missing_file( + missing_metadata_api: AnophelesSampleMetadata, +): + # In this test, one of the sample sets (AG1000G-BF-A) has a missing file. + # We expect this to be filled with empty values. + api = missing_metadata_api + + # Set up test. + df_sample_sets = api.sample_sets().set_index("sample_set") + sample_count = df_sample_sets["sample_count"] + all_sample_sets = df_sample_sets.index.to_list() + + for sample_set in all_sample_sets: + # Call function to be tested. + df = api.cohorts_metadata(sample_sets=sample_set) + + # Check output. + validate_cohorts_metadata(df, has_cohorts_by_quarter=True) + expected_len = sample_count.loc[sample_set] + assert len(df) == expected_len + + def sample_metadata_expected_columns(has_aims, has_cohorts_by_quarter): expected_columns = general_metadata_expected_columns() if has_aims: @@ -421,6 +487,25 @@ def test_sample_metadata_with_query(ag3_sim_api): assert (df["country"] == "Burkina Faso").all() +def test_sample_metadata_with_indices(ag3_sim_api): + df_all = ag3_sim_api.sample_metadata() + query = "country == 'Burkina Faso'" + indices = np.nonzero(df_all.eval(query))[0].tolist() + df1 = ag3_sim_api.sample_metadata(sample_query=query) + df2 = ag3_sim_api.sample_metadata(sample_indices=indices) + validate_metadata( + df1, + sample_metadata_expected_columns(has_aims=True, has_cohorts_by_quarter=True), + ) + assert (df1["country"] == "Burkina Faso").all() + validate_metadata( + df2, + sample_metadata_expected_columns(has_aims=True, has_cohorts_by_quarter=True), + ) + assert (df2["country"] == "Burkina Faso").all() + assert_frame_equal(df1, df2) + + @parametrize_with_cases("fixture,api", cases=".") def test_sample_metadata_quarter(fixture, api: AnophelesSampleMetadata): df = api.sample_metadata() @@ -440,6 +525,33 @@ def test_sample_metadata_quarter(fixture, api: AnophelesSampleMetadata): assert (df.query("month in [10, 11, 12]")["quarter"] == 4).all() +def test_sample_metadata_with_missing_file( + missing_metadata_api: AnophelesSampleMetadata, +): + # In this test, one of the sample sets (AG1000G-BF-A) has a missing file. + # We expect this to be filled with empty values. + api = missing_metadata_api + + # Set up test. + df_sample_sets = api.sample_sets().set_index("sample_set") + sample_count = df_sample_sets["sample_count"] + all_sample_sets = df_sample_sets.index.to_list() + + for sample_set in all_sample_sets: + # Call function to be tested. + df = api.sample_metadata(sample_sets=sample_set) + + # Check output. + validate_metadata( + df, + sample_metadata_expected_columns( + has_aims=True, has_cohorts_by_quarter=True + ), + ) + expected_len = sample_count.loc[sample_set] + assert len(df) == expected_len + + @parametrize_with_cases("fixture,api", cases=".") def test_extra_metadata_errors(fixture, api): # Bad type. diff --git a/tests/anoph/test_snp_data.py b/tests/anoph/test_snp_data.py index e6011fa23..8e7ab22e3 100644 --- a/tests/anoph/test_snp_data.py +++ b/tests/anoph/test_snp_data.py @@ -127,12 +127,12 @@ def test_open_snp_genotypes(fixture, api: AnophelesSnpData): assert samples.shape[0] == n_samples assert samples.dtype.kind == "S" + # Check calldata arrays. for contig in api.contigs: assert contig in root contig_grp = root[contig] - # Check calldata arrays. - n_sites = fixture.n_sites[contig] + n_sites = fixture.n_snp_sites[contig] assert "calldata" in contig_grp calldata = contig_grp["calldata"] assert "GT" in calldata @@ -153,7 +153,7 @@ def test_open_snp_genotypes(fixture, api: AnophelesSnpData): assert ad.dtype == "i2" -def _check_site_filters(api: AnophelesSnpData, mask, region): +def check_site_filters(api: AnophelesSnpData, mask, region): filter_pass = api.site_filters(region=region, mask=mask) assert isinstance(filter_pass, da.Array) assert filter_pass.ndim == 1 @@ -165,19 +165,19 @@ def test_site_filters(fixture, api: AnophelesSnpData): for mask in api.site_mask_ids: # Test with contig. contig = fixture.random_contig() - _check_site_filters(api, mask=mask, region=contig) + check_site_filters(api, mask=mask, region=contig) # Test with region string. region = fixture.random_region_str() - _check_site_filters(api, mask=mask, region=region) + check_site_filters(api, mask=mask, region=region) # Test with genome feature ID. df_gff = api.genome_features(attributes=["ID"]) region = random.choice(df_gff["ID"].dropna().to_list()) - _check_site_filters(api, mask=mask, region=region) + check_site_filters(api, mask=mask, region=region) -def _check_snp_sites(api: AnophelesSnpData, region): +def check_snp_sites(api: AnophelesSnpData, region): pos = api.snp_sites(region=region, field="POS") ref = api.snp_sites(region=region, field="REF") alt = api.snp_sites(region=region, field="ALT") @@ -221,16 +221,16 @@ def _check_snp_sites(api: AnophelesSnpData, region): def test_snp_sites(fixture, api: AnophelesSnpData): # Test with contig. contig = fixture.random_contig() - _check_snp_sites(api=api, region=contig) + check_snp_sites(api=api, region=contig) # Test with region string. region = fixture.random_region_str() - _check_snp_sites(api=api, region=region) + check_snp_sites(api=api, region=region) # Test with genome feature ID. df_gff = api.genome_features(attributes=["ID"]) region = random.choice(df_gff["ID"].dropna().to_list()) - _check_snp_sites(api=api, region=region) + check_snp_sites(api=api, region=region) @parametrize_with_cases("fixture,api", cases=".") @@ -303,7 +303,7 @@ def test_site_annotations(fixture, api): ) -def _check_snp_genotypes(api, sample_sets, region): +def check_snp_genotypes(api, sample_sets, region): df_samples = api.sample_metadata(sample_sets=sample_sets) # Check default field (GT). @@ -377,9 +377,9 @@ def _check_snp_genotypes(api, sample_sets, region): @parametrize_with_cases("fixture,api", cases=".") -def test_snp_genotypes(fixture, api: AnophelesSnpData): - # Here we manually parametrize sample_sets and region, because - # parameters need to be determined at runtime. +def test_snp_genotypes_with_sample_sets_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + region = fixture.random_region_str() # Parametrize sample_sets. all_releases = api.releases @@ -391,6 +391,17 @@ def test_snp_genotypes(fixture, api: AnophelesSnpData): random.choice(all_releases), ] + # Run tests. + for sample_sets in parametrize_sample_sets: + check_snp_genotypes(api=api, sample_sets=sample_sets, region=region) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_genotypes_with_region_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + # Parametrize region. contig = fixture.random_contig() df_gff = api.genome_features(attributes=["ID"]) @@ -402,11 +413,11 @@ def test_snp_genotypes(fixture, api: AnophelesSnpData): ] # Run tests. - for sample_sets, region in product(parametrize_sample_sets, parametrize_region): - _check_snp_genotypes(api=api, sample_sets=sample_sets, region=region) + for region in parametrize_region: + check_snp_genotypes(api=api, sample_sets=sample_sets, region=region) -def _check_snp_calls(api, sample_sets, region, site_mask): +def check_snp_calls(api, sample_sets, region, site_mask): ds = api.snp_calls(region=region, sample_sets=sample_sets, site_mask=site_mask) assert isinstance(ds, xr.Dataset) @@ -494,13 +505,14 @@ def _check_snp_calls(api, sample_sets, region, site_mask): @parametrize_with_cases("fixture,api", cases=".") -def test_snp_calls(fixture, api: AnophelesSnpData): - # Here we manually parametrize sample_sets, region and site_mask, - # because parameters need to be determined at runtime. +def test_snp_calls_with_sample_sets_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + region = fixture.random_region_str() + site_mask = random.choice((None,) + api.site_mask_ids) # Parametrize sample_sets. - all_releases = api.releases all_sample_sets = api.sample_sets()["sample_set"].to_list() + all_releases = api.releases parametrize_sample_sets = [ None, random.choice(all_sample_sets), @@ -508,6 +520,20 @@ def test_snp_calls(fixture, api: AnophelesSnpData): random.choice(all_releases), ] + # Run tests. + for sample_sets in parametrize_sample_sets: + check_snp_calls( + api=api, sample_sets=sample_sets, region=region, site_mask=site_mask + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_calls_with_region_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + site_mask = random.choice((None,) + api.site_mask_ids) + # Parametrize region. contig = fixture.random_contig() df_gff = api.genome_features(attributes=["ID"]) @@ -518,14 +544,26 @@ def test_snp_calls(fixture, api: AnophelesSnpData): random.choice(df_gff["ID"].dropna().to_list()), ] + # Run tests. + for region in parametrize_region: + check_snp_calls( + api=api, sample_sets=sample_sets, region=region, site_mask=site_mask + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_calls_with_site_mask_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + # Parametrize site_mask. parametrize_site_mask = (None,) + api.site_mask_ids # Run tests. - for sample_sets, region, site_mask in product( - parametrize_sample_sets, parametrize_region, parametrize_site_mask - ): - _check_snp_calls( + for site_mask in parametrize_site_mask: + check_snp_calls( api=api, sample_sets=sample_sets, region=region, site_mask=site_mask ) @@ -534,7 +572,7 @@ def test_snp_calls(fixture, api: AnophelesSnpData): "sample_query", ["sex_call == 'F'", "taxon == 'coluzzii'", "taxon == 'robot'"], ) -def test_snp_calls_with_sample_query(ag3_sim_api: AnophelesSnpData, sample_query): +def test_snp_calls_with_sample_query_param(ag3_sim_api: AnophelesSnpData, sample_query): df_samples = ag3_sim_api.sample_metadata().query(sample_query) if len(df_samples) == 0: @@ -548,7 +586,7 @@ def test_snp_calls_with_sample_query(ag3_sim_api: AnophelesSnpData, sample_query @parametrize_with_cases("fixture,api", cases=".") -def test_snp_calls_with_min_cohort_size(fixture, api: AnophelesSnpData): +def test_snp_calls_with_min_cohort_size_param(fixture, api: AnophelesSnpData): # Randomly fix some input parameters. all_sample_sets = api.sample_sets()["sample_set"].to_list() sample_sets = random.choice(all_sample_sets) @@ -571,7 +609,7 @@ def test_snp_calls_with_min_cohort_size(fixture, api: AnophelesSnpData): @parametrize_with_cases("fixture,api", cases=".") -def test_snp_calls_with_max_cohort_size(fixture, api: AnophelesSnpData): +def test_snp_calls_with_max_cohort_size_param(fixture, api: AnophelesSnpData): # Randomly fix some input parameters. all_sample_sets = api.sample_sets()["sample_set"].to_list() sample_sets = random.choice(all_sample_sets) @@ -588,30 +626,27 @@ def test_snp_calls_with_max_cohort_size(fixture, api: AnophelesSnpData): @parametrize_with_cases("fixture,api", cases=".") -def test_snp_calls_with_cohort_size(fixture, api: AnophelesSnpData): +def test_snp_calls_with_cohort_size_param(fixture, api: AnophelesSnpData): # Randomly fix some input parameters. all_sample_sets = api.sample_sets()["sample_set"].to_list() sample_sets = random.choice(all_sample_sets) - n_samples = len(api.sample_metadata(sample_sets=sample_sets)) region = fixture.random_region_str() # Test with specific cohort size. - cohort_size = 20 - if n_samples < 20: - with pytest.raises(ValueError): - api.snp_calls( - sample_sets=sample_sets, - region=region, - cohort_size=cohort_size, - ) - else: - ds = api.snp_calls( + cohort_size = random.randint(1, 10) + ds = api.snp_calls( + sample_sets=sample_sets, + region=region, + cohort_size=cohort_size, + ) + assert isinstance(ds, xr.Dataset) + assert ds.dims["samples"] == cohort_size + with pytest.raises(ValueError): + api.snp_calls( sample_sets=sample_sets, region=region, - cohort_size=cohort_size, + cohort_size=1_000, ) - assert isinstance(ds, xr.Dataset) - assert ds.dims["samples"] == 20 @pytest.mark.parametrize( @@ -629,13 +664,13 @@ def test_snp_calls_with_cohort_size(fixture, api: AnophelesSnpData): "INTERGENIC", ], ) -def test_snp_calls_with_site_class(ag3_sim_api: AnophelesSnpData, site_class): +def test_snp_calls_with_site_class_param(ag3_sim_api: AnophelesSnpData, site_class): ds1 = ag3_sim_api.snp_calls(region="3L") ds2 = ag3_sim_api.snp_calls(region="3L", site_class=site_class) assert ds2.dims["variants"] < ds1.dims["variants"] -def _check_snp_allele_counts(api, region, sample_sets, sample_query, site_mask): +def check_snp_allele_counts(api, region, sample_sets, sample_query, site_mask): df_samples = api.sample_metadata(sample_sets=sample_sets, sample_query=sample_query) n_samples = len(df_samples) @@ -664,10 +699,14 @@ def _check_snp_allele_counts(api, region, sample_sets, sample_query, site_mask): @parametrize_with_cases("fixture,api", cases=".") -def test_snp_allele_counts(fixture, api): +def test_snp_allele_counts_with_sample_sets_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + region = fixture.random_region_str() + site_mask = random.choice((None,) + api.site_mask_ids) + # Parametrize sample_sets. - all_releases = api.releases all_sample_sets = api.sample_sets()["sample_set"].to_list() + all_releases = api.releases parametrize_sample_sets = [ None, random.choice(all_sample_sets), @@ -675,6 +714,24 @@ def test_snp_allele_counts(fixture, api): random.choice(all_releases), ] + # Run tests. + for sample_sets in parametrize_sample_sets: + check_snp_allele_counts( + api=api, + sample_sets=sample_sets, + region=region, + site_mask=site_mask, + sample_query=None, + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_allele_counts_with_region_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + site_mask = random.choice((None,) + api.site_mask_ids) + # Parametrize region. contig = fixture.random_contig() df_gff = api.genome_features(attributes=["ID"]) @@ -685,20 +742,52 @@ def test_snp_allele_counts(fixture, api): random.choice(df_gff["ID"].dropna().to_list()), ] + # Run tests. + for region in parametrize_region: + check_snp_allele_counts( + api=api, + sample_sets=sample_sets, + region=region, + site_mask=site_mask, + sample_query=None, + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_allele_counts_with_site_mask_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + # Parametrize site_mask. - parametrize_site_mask = (None, random.choice(api.site_mask_ids)) + parametrize_site_mask = (None,) + api.site_mask_ids + + # Run tests. + for site_mask in parametrize_site_mask: + check_snp_allele_counts( + api=api, + sample_sets=sample_sets, + region=region, + site_mask=site_mask, + sample_query=None, + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_allele_counts_with_sample_query_param(fixture, api: AnophelesSnpData): + # Fixed parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + region = fixture.random_region_str() + site_mask = random.choice((None,) + api.site_mask_ids) # Parametrize sample_query. - parametrize_sample_query = [None, "sex_call == 'F'"] + parametrize_sample_query = (None, "sex_call == 'F'") # Run tests. - for sample_sets, region, site_mask, sample_query in product( - parametrize_sample_sets, - parametrize_region, - parametrize_site_mask, - parametrize_sample_query, - ): - _check_snp_allele_counts( + for sample_query in parametrize_sample_query: + check_snp_allele_counts( api=api, sample_sets=sample_sets, region=region, diff --git a/tests/test_af1.py b/tests/test_af1.py index 3f4b74415..367bdb279 100644 --- a/tests/test_af1.py +++ b/tests/test_af1.py @@ -1,6 +1,5 @@ import shutil -import dask.array as da import numpy as np import pandas as pd import pytest @@ -169,169 +168,6 @@ def test_snp_allele_frequencies__query(): assert len(df) == 1309 -# TODO: Af1.0 CNV tests will go here, with test_haplotypes underneath - - -@pytest.mark.parametrize( - "sample_sets", - [ - "1229-VO-GH-DADZIE-VMF00095", - ("1240-VO-CD-KOEKEMOER-VMF00099", "1240-VO-MZ-KOEKEMOER-VMF00101"), - "1.0", - None, - ], -) -@pytest.mark.parametrize( - "region", ["3RL", ["2RL:48,714,463-48,715,355", "LOC125761549_t7"]] -) -def test_haplotypes(sample_sets, region): - af1 = setup_af1() - - # check expected samples - df_samples = af1.sample_metadata(sample_sets=sample_sets) - expected_samples = df_samples["sample_id"].tolist() - n_samples = len(expected_samples) - - # check if any samples - if n_samples == 0: - ds = af1.haplotypes(region=region, sample_sets=sample_sets) - assert ds is None - return - - ds = af1.haplotypes(region=region, sample_sets=sample_sets) - assert isinstance(ds, xr.Dataset) - - # check fields - expected_data_vars = { - "variant_allele", - "call_genotype", - } - assert set(ds.data_vars) == expected_data_vars - - expected_coords = { - "variant_contig", - "variant_position", - "sample_id", - } - assert set(ds.coords) == expected_coords - - # check dimensions - assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} - - # check samples - samples = ds["sample_id"].values - assert set(samples) == set(expected_samples) - - # check dim lengths - assert ds.dims["samples"] == n_samples - assert ds.dims["ploidy"] == 2 - assert ds.dims["alleles"] == 2 - - # check shapes - for f in expected_coords | expected_data_vars: - x = ds[f] - assert isinstance(x, xr.DataArray) - assert isinstance(x.data, da.Array) - - if f == "variant_allele": - assert x.ndim == 2 - assert x.shape[1] == 2 - assert x.dims == ("variants", "alleles") - elif f.startswith("variant_"): - assert x.ndim == 1 - assert x.dims == ("variants",) - elif f == "call_genotype": - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "ploidy") - assert x.shape[1] == n_samples - assert x.shape[2] == 2 - - # check attributes - assert "contigs" in ds.attrs - assert ds.attrs["contigs"] == ("2RL", "3RL", "X") - - # check can set up computations - d1 = ds["variant_position"] > 10_000 - assert isinstance(d1, xr.DataArray) - d2 = ds["call_genotype"].sum(axis=(1, 2)) - assert isinstance(d2, xr.DataArray) - - -@pytest.mark.parametrize( - "sample_query", - [ - "location == 'Dimabi'", - "location == 'Gbullung'", - ], -) -def test_haplotypes__sample_query(sample_query): - sample_sets = "1229-VO-GH-DADZIE-VMF00095" - region = "3RL" - - af1 = setup_af1() - - # check expected samples - df_samples = af1.sample_metadata(sample_sets=sample_sets) - expected_samples = df_samples.query(sample_query)["sample_id"].tolist() - n_samples = len(expected_samples) - - ds = af1.haplotypes( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - ) - assert isinstance(ds, xr.Dataset) - - # check fields - expected_data_vars = { - "variant_allele", - "call_genotype", - } - assert set(ds.data_vars) == expected_data_vars - - expected_coords = { - "variant_contig", - "variant_position", - "sample_id", - } - assert set(ds.coords) == expected_coords - - # check dimensions - assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} - - # check samples - samples = ds["sample_id"].values - assert set(samples) == set(expected_samples) - - # check dim lengths - assert ds.dims["samples"] == n_samples - assert ds.dims["ploidy"] == 2 - assert ds.dims["alleles"] == 2 - - # check shapes - for f in expected_coords | expected_data_vars: - x = ds[f] - assert isinstance(x, xr.DataArray) - assert isinstance(x.data, da.Array) - - if f == "variant_allele": - assert x.ndim == 2 - assert x.shape[1] == 2 - assert x.dims == ("variants", "alleles") - elif f.startswith("variant_"): - assert x.ndim == 1 - assert x.dims == ("variants",) - elif f == "call_genotype": - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "ploidy") - assert x.shape[1] == n_samples - assert x.shape[2] == 2 - - # check attributes - assert "contigs" in ds.attrs - assert ds.attrs["contigs"] == ("2RL", "3RL", "X") - - @pytest.mark.parametrize( "region_raw", [ @@ -343,6 +179,7 @@ def test_haplotypes__sample_query(sample_query): ], ) def test_locate_region(region_raw): + # TODO Migrate this test. af1 = setup_af1() gene_annotation = af1.geneset(attributes=["ID"]) region = resolve_region(af1, region_raw) diff --git a/tests/test_ag3.py b/tests/test_ag3.py index cca73803a..283f0f225 100644 --- a/tests/test_ag3.py +++ b/tests/test_ag3.py @@ -1284,174 +1284,6 @@ def test_gene_cnv_frequencies__missing_samples(): assert isinstance(df, pd.DataFrame) -@pytest.mark.parametrize( - "sample_sets", - ["AG1000G-BF-A", ("AG1000G-TZ", "AG1000G-UG"), "3.0", None], -) -@pytest.mark.parametrize("region", ["3L", ["2R:48,714,463-48,715,355", "AGAP007280"]]) -@pytest.mark.parametrize("analysis", ["arab", "gamb_colu", "gamb_colu_arab"]) -def test_haplotypes(sample_sets, region, analysis): - ag3 = setup_ag3() - - # check expected samples - phased_samples_query = None - if analysis == "arab": - phased_samples_query = ( - "aim_species == 'arabiensis' and sample_set != 'AG1000G-X'" - ) - elif analysis == "gamb_colu": - phased_samples_query = ( - "aim_species in ['gambiae', 'coluzzii', 'intermediate_gambiae_coluzzii'] and " - "sample_set != 'AG1000G-X'" - ) - elif analysis == "gamb_colu_arab": - phased_samples_query = "sample_set != 'AG1000G-X'" - df_samples = ag3.sample_metadata(sample_sets=sample_sets) - expected_samples = df_samples.query(phased_samples_query)["sample_id"].tolist() - n_samples = len(expected_samples) - - # check if any samples - if n_samples == 0: - ds = ag3.haplotypes(region=region, sample_sets=sample_sets, analysis=analysis) - assert ds is None - return - - ds = ag3.haplotypes(region=region, sample_sets=sample_sets, analysis=analysis) - assert isinstance(ds, xr.Dataset) - - # check fields - expected_data_vars = { - "variant_allele", - "call_genotype", - } - assert set(ds.data_vars) == expected_data_vars - - expected_coords = { - "variant_contig", - "variant_position", - "sample_id", - } - assert set(ds.coords) == expected_coords - - # check dimensions - assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} - - # check samples - samples = ds["sample_id"].values - assert set(samples) == set(expected_samples) - - # check dim lengths - assert ds.dims["samples"] == n_samples - assert ds.dims["ploidy"] == 2 - assert ds.dims["alleles"] == 2 - - # check shapes - for f in expected_coords | expected_data_vars: - x = ds[f] - assert isinstance(x, xr.DataArray) - assert isinstance(x.data, da.Array) - - if f == "variant_allele": - assert x.ndim == 2 - assert x.shape[1] == 2 - assert x.dims == ("variants", "alleles") - elif f.startswith("variant_"): - assert x.ndim == 1 - assert x.dims == ("variants",) - elif f == "call_genotype": - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "ploidy") - assert x.shape[1] == n_samples - assert x.shape[2] == 2 - - # check attributes - assert "contigs" in ds.attrs - assert ds.attrs["contigs"] == ("2R", "2L", "3R", "3L", "X") - - # check can set up computations - d1 = ds["variant_position"] > 10_000 - assert isinstance(d1, xr.DataArray) - d2 = ds["call_genotype"].sum(axis=(1, 2)) - assert isinstance(d2, xr.DataArray) - - -@pytest.mark.parametrize( - "sample_query", - [ - "taxon == 'coluzzii' and location == 'Bana Village'", - "taxon == 'gambiae' and location == 'Pala'", - ], -) -def test_haplotypes__sample_query(sample_query): - sample_sets = "AG1000G-BF-B" - region = "3L" - analysis = "gamb_colu_arab" - - ag3 = setup_ag3() - - # check expected samples - df_samples = ag3.sample_metadata(sample_sets=sample_sets) - expected_samples = df_samples.query(sample_query)["sample_id"].tolist() - n_samples = len(expected_samples) - - ds = ag3.haplotypes( - region=region, - sample_sets=sample_sets, - analysis=analysis, - sample_query=sample_query, - ) - assert isinstance(ds, xr.Dataset) - - # check fields - expected_data_vars = { - "variant_allele", - "call_genotype", - } - assert set(ds.data_vars) == expected_data_vars - - expected_coords = { - "variant_contig", - "variant_position", - "sample_id", - } - assert set(ds.coords) == expected_coords - - # check dimensions - assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} - - # check samples - samples = ds["sample_id"].values - assert set(samples) == set(expected_samples) - - # check dim lengths - assert ds.dims["samples"] == n_samples - assert ds.dims["ploidy"] == 2 - assert ds.dims["alleles"] == 2 - - # check shapes - for f in expected_coords | expected_data_vars: - x = ds[f] - assert isinstance(x, xr.DataArray) - assert isinstance(x.data, da.Array) - - if f == "variant_allele": - assert x.ndim == 2 - assert x.shape[1] == 2 - assert x.dims == ("variants", "alleles") - elif f.startswith("variant_"): - assert x.ndim == 1 - assert x.dims == ("variants",) - elif f == "call_genotype": - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "ploidy") - assert x.shape[1] == n_samples - assert x.shape[2] == 2 - - # check attributes - assert "contigs" in ds.attrs - assert ds.attrs["contigs"] == ("2R", "2L", "3R", "3L", "X") - - @pytest.mark.parametrize( "region_raw", [ @@ -1463,6 +1295,7 @@ def test_haplotypes__sample_query(sample_query): ], ) def test_locate_region(region_raw): + # TODO Migrate this test. ag3 = setup_ag3() gene_annotation = ag3.genome_features(attributes=["ID"]) region = resolve_region(ag3, region_raw) diff --git a/tests/test_anopheles.py b/tests/test_anopheles.py index 3d33b8095..9b2ca7bd1 100644 --- a/tests/test_anopheles.py +++ b/tests/test_anopheles.py @@ -1,7 +1,6 @@ import numpy as np import pandas as pd import pytest -import xarray as xr from numpy.testing import assert_allclose from pandas.testing import assert_frame_equal @@ -341,46 +340,6 @@ def test_haplotype_joint_frequencies(): assert_allclose(vals, np.array([0, 0, 0, 0, 0.04, 0.16])) -@pytest.mark.parametrize( - "subclass, sample_sets, region, analysis, cohort_size", - [ - (Ag3, "AG1000G-BF-B", "3L", "gamb_colu_arab", 10), - (Af1, "1229-VO-GH-DADZIE-VMF00095", "3RL", "funestus", 10), - ], -) -def test_haplotypes__cohort_size(subclass, sample_sets, region, analysis, cohort_size): - anoph = setup_subclass_cached(subclass) - - ds = anoph.haplotypes( - region=region, - sample_sets=sample_sets, - analysis=analysis, - cohort_size=cohort_size, - ) - assert isinstance(ds, xr.Dataset) - - # check fields - expected_data_vars = { - "variant_allele", - "call_genotype", - } - assert set(ds.data_vars) == expected_data_vars - - expected_coords = { - "variant_contig", - "variant_position", - "sample_id", - } - assert set(ds.coords) == expected_coords - - # check dimensions - assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} - - # check dim lengths - assert ds.dims["samples"] == cohort_size - assert ds.dims["alleles"] == 2 - - @pytest.mark.parametrize( "subclass, sample_query, contig, analysis, sample_sets", [