From b61db654c94b2ebbdab2b9c24b1b38acaba3776f Mon Sep 17 00:00:00 2001 From: chris Date: Thu, 7 Oct 2021 15:18:04 +0100 Subject: [PATCH] new cohort analysis is default --- malariagen_data/ag3.py | 6 +++--- tests/test_ag3.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py index fa4448407..ce1031c78 100644 --- a/malariagen_data/ag3.py +++ b/malariagen_data/ag3.py @@ -771,7 +771,7 @@ def snp_allele_frequencies( self, transcript, cohorts, - cohorts_analysis="20210702", + cohorts_analysis="20210927", min_cohort_size=10, site_mask=None, site_filters="dt_20200416", @@ -1661,7 +1661,7 @@ def gene_cnv_frequencies( self, contig, cohorts, - cohorts_analysis="20210702", + cohorts_analysis="20210927", min_cohort_size=10, species_calls=("20200422", "aim"), sample_sets="v3_wild", @@ -1962,7 +1962,7 @@ def _read_cohort_metadata(self, *, sample_set, cohorts_analysis): self._cache_cohort_metadata[(sample_set, cohorts_analysis)] = df return df - def sample_cohorts(self, sample_sets="v3_wild", cohorts_analysis="20210702"): + def sample_cohorts(self, sample_sets="v3_wild", cohorts_analysis="20210927"): """Access cohorts metadata for one or more sample sets. Parameters diff --git a/tests/test_ag3.py b/tests/test_ag3.py index 2089618b3..5e45fd3bd 100644 --- a/tests/test_ag3.py +++ b/tests/test_ag3.py @@ -617,20 +617,20 @@ def test_snp_allele_frequencies__str_cohorts(): df = ag3.snp_allele_frequencies( transcript="AGAP004707-RD", cohorts=cohorts, - cohorts_analysis="20210702", + cohorts_analysis="20210927", min_cohort_size=10, site_mask="gamb_colu", sample_sets="v3_wild", drop_invariant=True, ) - df_coh = ag3.sample_cohorts(sample_sets="v3_wild", cohorts_analysis="20210702") + df_coh = ag3.sample_cohorts(sample_sets="v3_wild", cohorts_analysis="20210927") coh_nm = "cohort_" + cohorts all_uni = df_coh[coh_nm].dropna().unique().tolist() expected_fields = universal_fields + all_uni + ["max_af"] assert df.columns.tolist() == expected_fields assert isinstance(df, pd.DataFrame) - assert df.shape == (16639, 101) + assert df.shape == (16524, 103) def test_snp_allele_frequencies__dict_cohorts(): @@ -1049,7 +1049,7 @@ def test_gene_cnv_frequencies(contig, cohorts): cohort_labels = cohorts.keys() if isinstance(cohorts, str): df_coh = ag3.sample_cohorts( - sample_sets="v3_wild", cohorts_analysis="20210702" + sample_sets="v3_wild", cohorts_analysis="20210927" ) coh_nm = "cohort_" + cohorts cohort_labels = list(df_coh[coh_nm].dropna().unique()) @@ -1173,7 +1173,7 @@ def test_sample_cohorts(sample_sets): ) ag3 = setup_ag3() - df_coh = ag3.sample_cohorts(sample_sets=sample_sets, cohorts_analysis="20210702") + df_coh = ag3.sample_cohorts(sample_sets=sample_sets, cohorts_analysis="20210927") df_meta = ag3.sample_metadata(sample_sets=sample_sets) assert tuple(df_coh.columns) == expected_cols