Skip to content

Commit

Permalink
Refactor Anopheles tests on genome sequence, genome features and samp…
Browse files Browse the repository at this point in the history
…le metadata (#399)

* remove dup tests

* refactor fixtures

* fix tests

* remove dup tests

* update poetry

* remove dup tests

* sample metadata test refactoring

* refactor snp_data tests

* refactor sample metadata tests
  • Loading branch information
alimanfoo authored May 19, 2023
1 parent 8da7b43 commit bd62d00
Show file tree
Hide file tree
Showing 8 changed files with 371 additions and 787 deletions.
28 changes: 14 additions & 14 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

128 changes: 86 additions & 42 deletions tests/anoph/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import string
from pathlib import Path
from random import choice, choices, randint
from typing import Any, Dict, Tuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -495,15 +496,25 @@ def simulate_site_annotations(path, genome):
zarr.consolidate_metadata(path)


class Ag3Simulator:
def __init__(self, fixture_dir):
class AnophelesSimulator:
def __init__(
self,
fixture_dir: Path,
bucket: str,
releases: Tuple[str, ...],
has_aims: bool,
has_cohorts_by_quarter: bool,
):
self.fixture_dir = fixture_dir
self.bucket = "vo_agam_release"
self.bucket = bucket
self.bucket_path = (self.fixture_dir / "simulated" / self.bucket).resolve()
self.results_cache_path = (
self.fixture_dir / "simulated" / "ag3_results_cache"
self.fixture_dir / "simulated" / "results_cache"
).resolve()
self.url = self.bucket_path.as_uri()
self.releases = releases
self.has_aims = has_aims
self.has_cohorts_by_quarter = has_cohorts_by_quarter

# Clear out the fixture directories.
shutil.rmtree(self.bucket_path, ignore_errors=True)
Expand All @@ -512,9 +523,14 @@ def __init__(self, fixture_dir):
# Ensure the fixture directory exists.
self.bucket_path.mkdir(parents=True, exist_ok=True)

# These members to be overridden/populated in subclasses.
self.config: Dict[str, Any] = dict()
self.contig_sizes: Dict[str, int] = dict()
self.release_manifests: Dict[str, pd.DataFrame] = dict()
self.genome = None
self.genome_features = None

# Create fixture data.
self.releases = ("3.0", "3.1")
self.release_manifests = dict()
self.init_config()
self.init_public_release_manifest()
self.init_pre_release_manifest()
Expand All @@ -526,6 +542,62 @@ def __init__(self, fixture_dir):
self.init_snp_genotypes()
self.init_site_annotations()

@property
def contigs(self) -> Tuple[str, ...]:
return tuple(self.config["CONTIGS"])

def random_contig(self):
return choice(self.contigs)

def random_region_str(self):
contig = self.random_contig()
contig_size = self.contig_sizes[contig]
region_start = randint(1, contig_size)
region_end = randint(region_start, contig_size)
region = f"{contig}:{region_start:,}-{region_end:,}"
return region

def init_config(self):
pass

def init_public_release_manifest(self):
pass

def init_pre_release_manifest(self):
pass

def init_genome_sequence(self):
pass

def init_genome_features(self):
pass

def init_metadata(self):
pass

def init_snp_sites(self):
pass

def init_site_filters(self):
pass

def init_snp_genotypes(self):
pass

def init_site_annotations(self):
pass


class Ag3Simulator(AnophelesSimulator):
def __init__(self, fixture_dir):
super().__init__(
fixture_dir=fixture_dir,
bucket="vo_agam_release",
releases=("3.0", "3.1"),
has_aims=True,
has_cohorts_by_quarter=True,
)

def init_config(self):
self.config = {
"PUBLIC_RELEASES": ["3.0"],
Expand All @@ -547,10 +619,6 @@ def init_config(self):
with config_path.open(mode="w") as f:
json.dump(self.config, f, indent=4)

@property
def contigs(self):
return tuple(self.config["CONTIGS"])

def init_public_release_manifest(self):
# Here we create a release manifest for an Ag3-style
# public release. Note this is not the exact same data
Expand Down Expand Up @@ -821,35 +889,15 @@ def init_site_annotations(self):
simulate_site_annotations(path=path, genome=self.genome)


class Af1Simulator:
class Af1Simulator(AnophelesSimulator):
def __init__(self, fixture_dir):
self.fixture_dir = fixture_dir
self.bucket = "vo_afun_release"
self.bucket_path = (self.fixture_dir / "simulated" / self.bucket).resolve()
self.url = self.bucket_path.as_uri()
self.results_cache_path = (
self.fixture_dir / "simulated" / "af1_results_cache"
).resolve()

# Clear out the fixture directories.
shutil.rmtree(self.bucket_path, ignore_errors=True)
shutil.rmtree(self.results_cache_path, ignore_errors=True)

# Ensure the fixture directory exists.
self.bucket_path.mkdir(parents=True, exist_ok=True)

# Create fixture data.
self.releases = ("1.0",)
self.release_manifests = dict()
self.init_config()
self.init_public_release_manifest()
self.init_genome_sequence()
self.init_genome_features()
self.init_metadata()
self.init_snp_sites()
self.init_site_filters()
self.init_snp_genotypes()
self.init_site_annotations()
super().__init__(
fixture_dir=fixture_dir,
bucket="vo_afun_release",
releases=("1.0",),
has_aims=False,
has_cohorts_by_quarter=False,
)

def init_config(self):
self.config = {
Expand All @@ -871,10 +919,6 @@ def init_config(self):
with config_path.open(mode="w") as f:
json.dump(self.config, f, indent=4)

@property
def contigs(self):
return tuple(self.config["CONTIGS"])

def init_public_release_manifest(self):
# Here we create a release manifest for an Af1-style
# public release. Note this is not the exact same data
Expand Down
29 changes: 20 additions & 9 deletions tests/anoph/test_genome_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from malariagen_data import af1 as _af1
from malariagen_data import ag3 as _ag3
from malariagen_data.anoph.genome_features import AnophelesGenomeFeaturesData
from malariagen_data.util import Region, resolve_region


@pytest.fixture
Expand Down Expand Up @@ -99,19 +100,29 @@ def test_genome_features_region_contig(fixture, api: AnophelesGenomeFeaturesData

@parametrize_with_cases("fixture,api", cases=".")
def test_genome_features_region_string(fixture, api: AnophelesGenomeFeaturesData):
for contig in fixture.contigs:
contig_size = fixture.contig_sizes[contig]
region_start = random.randint(1, contig_size)
region_end = random.randint(region_start, contig_size)
region = f"{contig}:{region_start:,}-{region_end:,}"
parametrize_region = [
# Single contig.
fixture.random_contig(),
# List of contigs.
[fixture.random_contig(), fixture.random_contig()],
# Single region.
fixture.random_region_str(),
# List of regions.
[fixture.random_region_str(), fixture.random_region_str()],
]

for region in parametrize_region:
df_gf = api.genome_features(region=region, attributes=None)
expected_cols = gff3_cols + ["attributes"]
assert df_gf.columns.to_list() == expected_cols
# N.B., it's possible that the region overlaps no features.
if len(df_gf) > 0:
assert (df_gf["contig"] == contig).all()
assert (df_gf["end"] >= region_start).all()
assert (df_gf["start"] <= region_end).all()
r = resolve_region(api, region)
if len(df_gf) > 0 and isinstance(r, Region):
assert (df_gf["contig"] == r.contig).all()
if r.start is not None:
assert (df_gf["end"] >= r.start).all()
if r.end is not None:
assert (df_gf["start"] <= r.end).all()


@parametrize_with_cases("fixture,api", cases=".")
Expand Down
Loading

0 comments on commit bd62d00

Please sign in to comment.