Skip to content

Commit

Permalink
Merge pull request #546 from malariagen/site-mask-default-alimanfoo-2…
Browse files Browse the repository at this point in the history
…024-06-04

Ensure a site mask is used by default for Anopheles popgen functions
  • Loading branch information
leehart authored Jun 7, 2024
2 parents dc89c9c + 3e2ab61 commit f14f745
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 72 deletions.
5 changes: 2 additions & 3 deletions malariagen_data/anoph/dipclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
)
from ..plotly_dendrogram import plot_dendrogram
from . import base_params, plotly_params, tree_params, dipclust_params
from .base_params import DEFAULT
from .snp_data import AnophelesSnpData


Expand All @@ -39,7 +38,7 @@ def __init__(
def plot_diplotype_clustering(
self,
region: base_params.regions,
site_mask: base_params.site_mask = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
cohort_size: Optional[base_params.cohort_size] = None,
Expand Down Expand Up @@ -179,7 +178,7 @@ def plot_diplotype_clustering(
def diplotype_pairwise_distances(
self,
region: base_params.regions,
site_mask: base_params.site_mask = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
site_class: Optional[base_params.site_class] = None,
Expand Down
11 changes: 5 additions & 6 deletions malariagen_data/anoph/fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import plotly.express as px

from .snp_data import AnophelesSnpData
from .base_params import DEFAULT
from . import base_params, fst_params, gplt_params, plotly_params
from ..util import CacheMiss, check_types

Expand Down Expand Up @@ -105,7 +104,7 @@ def fst_gwss(
cohort1_query: base_params.sample_query,
cohort2_query: base_params.sample_query,
sample_sets: Optional[base_params.sample_sets] = None,
site_mask: base_params.site_mask = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
cohort_size: Optional[base_params.cohort_size] = fst_params.cohort_size_default,
min_cohort_size: Optional[
base_params.min_cohort_size
Expand Down Expand Up @@ -160,7 +159,7 @@ def plot_fst_gwss_track(
cohort1_query: base_params.sample_query,
cohort2_query: base_params.sample_query,
sample_sets: Optional[base_params.sample_sets] = None,
site_mask: base_params.site_mask = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
cohort_size: Optional[base_params.cohort_size] = fst_params.cohort_size_default,
min_cohort_size: Optional[
base_params.min_cohort_size
Expand Down Expand Up @@ -262,7 +261,7 @@ def plot_fst_gwss(
cohort1_query: base_params.sample_query,
cohort2_query: base_params.sample_query,
sample_sets: Optional[base_params.sample_sets] = None,
site_mask: base_params.site_mask = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
cohort_size: Optional[base_params.cohort_size] = fst_params.cohort_size_default,
min_cohort_size: Optional[
base_params.min_cohort_size
Expand Down Expand Up @@ -350,7 +349,7 @@ def average_fst(
base_params.max_cohort_size
] = fst_params.max_cohort_size_default,
n_jack: base_params.n_jack = 200,
site_mask: base_params.site_mask = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
site_class: Optional[base_params.site_class] = None,
random_seed: base_params.random_seed = 42,
) -> Tuple[float, float]:
Expand Down Expand Up @@ -416,7 +415,7 @@ def pairwise_average_fst(
base_params.max_cohort_size
] = fst_params.max_cohort_size_default,
n_jack: base_params.n_jack = 200,
site_mask: base_params.site_mask = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
site_class: Optional[base_params.site_class] = None,
random_seed: base_params.random_seed = 42,
) -> fst_params.df_pairwise_fst:
Expand Down
21 changes: 10 additions & 11 deletions malariagen_data/anoph/g123.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from .hap_data import AnophelesHapData
from ..util import hash_columns, check_types, CacheMiss
from . import base_params
from .base_params import DEFAULT
from . import g123_params, gplt_params


Expand Down Expand Up @@ -150,8 +149,8 @@ def g123_gwss(
self,
contig: base_params.contig,
window_size: g123_params.window_size,
sites: g123_params.sites = DEFAULT,
site_mask: Optional[base_params.site_mask] = DEFAULT,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
min_cohort_size: Optional[
Expand All @@ -168,7 +167,7 @@ def g123_gwss(
# invalidate any previously cached data.
name = "g123_gwss_v1"

if sites == DEFAULT:
if sites == base_params.DEFAULT:
assert self._default_phasing_analysis is not None
sites = self._default_phasing_analysis
valid_sites = self.phasing_analysis_ids + ("all", "segregating")
Expand Down Expand Up @@ -252,8 +251,8 @@ def _g123_calibration(
def g123_calibration(
self,
contig: base_params.contig,
sites: g123_params.sites = DEFAULT,
site_mask: Optional[base_params.site_mask] = DEFAULT,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_query: Optional[base_params.sample_query] = None,
sample_sets: Optional[base_params.sample_sets] = None,
min_cohort_size: Optional[
Expand Down Expand Up @@ -305,8 +304,8 @@ def plot_g123_gwss_track(
self,
contig: base_params.contig,
window_size: g123_params.window_size,
sites: g123_params.sites = DEFAULT,
site_mask: Optional[base_params.site_mask] = DEFAULT,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
min_cohort_size: Optional[
Expand Down Expand Up @@ -406,8 +405,8 @@ def plot_g123_gwss(
self,
contig: base_params.contig,
window_size: g123_params.window_size,
sites: g123_params.sites = DEFAULT,
site_mask: Optional[base_params.site_mask] = DEFAULT,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
min_cohort_size: Optional[
Expand Down Expand Up @@ -484,7 +483,7 @@ def plot_g123_calibration(
self,
contig: base_params.contig,
sites: g123_params.sites,
site_mask: Optional[base_params.site_mask] = DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_query: Optional[base_params.sample_query] = None,
sample_sets: Optional[base_params.sample_sets] = None,
min_cohort_size: Optional[
Expand Down
7 changes: 3 additions & 4 deletions malariagen_data/anoph/genome_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
unpack_gff3_attributes,
)
from . import base_params, gplt_params
from .base_params import DEFAULT
from .genome_sequence import AnophelesGenomeSequenceData


Expand Down Expand Up @@ -109,7 +108,7 @@ def _prep_gff_attributes(
) -> Tuple[str, ...]:
if attributes is None:
attributes_normed: Tuple[str, ...] = ()
elif attributes == DEFAULT:
elif attributes == base_params.DEFAULT:
attributes_normed = self._gff_default_attributes
elif isinstance(attributes, str):
attributes_normed = (attributes,)
Expand All @@ -125,7 +124,7 @@ def _prep_gff_attributes(
def genome_features(
self,
region: Optional[base_params.regions] = None,
attributes: base_params.gff_attributes = DEFAULT,
attributes: base_params.gff_attributes = base_params.DEFAULT,
) -> pd.DataFrame:
debug = self._log.debug

Expand Down Expand Up @@ -159,7 +158,7 @@ def genome_features(
)

def genome_feature_children(
self, parent: str, attributes: base_params.gff_attributes = DEFAULT
self, parent: str, attributes: base_params.gff_attributes = base_params.DEFAULT
) -> pd.DataFrame:
# Normalise attributes and ensure Parent is included.
attributes_normed = self._prep_gff_attributes(attributes)
Expand Down
11 changes: 5 additions & 6 deletions malariagen_data/anoph/h12.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from .hap_data import AnophelesHapData
from ..util import hash_columns, check_types, CacheMiss
from . import base_params
from .base_params import DEFAULT
from . import h12_params, gplt_params, hap_params


Expand Down Expand Up @@ -70,7 +69,7 @@ def _h12_calibration(
def h12_calibration(
self,
contig: base_params.contig,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_query: Optional[base_params.sample_query] = None,
sample_sets: Optional[base_params.sample_sets] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
Expand Down Expand Up @@ -122,7 +121,7 @@ def h12_calibration(
def plot_h12_calibration(
self,
contig: base_params.contig,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_query: Optional[base_params.sample_query] = None,
sample_sets: Optional[base_params.sample_sets] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
Expand Down Expand Up @@ -260,7 +259,7 @@ def h12_gwss(
self,
contig: base_params.contig,
window_size: h12_params.window_size,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_query: Optional[base_params.sample_query] = None,
sample_sets: Optional[base_params.sample_sets] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
Expand Down Expand Up @@ -311,7 +310,7 @@ def plot_h12_gwss_track(
self,
contig: base_params.contig,
window_size: h12_params.window_size,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
Expand Down Expand Up @@ -408,7 +407,7 @@ def plot_h12_gwss(
self,
contig: base_params.contig,
window_size: h12_params.window_size,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
Expand Down
7 changes: 3 additions & 4 deletions malariagen_data/anoph/h1x.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from .hap_data import AnophelesHapData
from ..util import check_types, CacheMiss
from . import base_params
from .base_params import DEFAULT
from . import h12_params, gplt_params, hap_params
from .h12 import haplotype_frequencies

Expand Down Expand Up @@ -97,7 +96,7 @@ def h1x_gwss(
window_size: h12_params.window_size,
cohort1_query: base_params.sample_query,
cohort2_query: base_params.sample_query,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
min_cohort_size: Optional[
Expand Down Expand Up @@ -153,7 +152,7 @@ def plot_h1x_gwss_track(
window_size: h12_params.window_size,
cohort1_query: base_params.cohort1_query,
cohort2_query: base_params.cohort2_query,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
min_cohort_size: Optional[
Expand Down Expand Up @@ -255,7 +254,7 @@ def plot_h1x_gwss(
window_size: h12_params.window_size,
cohort1_query: base_params.cohort1_query,
cohort2_query: base_params.cohort2_query,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
min_cohort_size: Optional[
Expand Down
11 changes: 5 additions & 6 deletions malariagen_data/anoph/hap_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
simple_xarray_concat,
)
from . import base_params, hap_params
from .base_params import DEFAULT
from .genome_features import AnophelesGenomeFeaturesData
from .genome_sequence import AnophelesGenomeSequenceData
from .sample_metadata import AnophelesSampleMetadata
Expand Down Expand Up @@ -57,7 +56,7 @@ def phasing_analysis_ids(self) -> Tuple[str, ...]:
return tuple(self.config.get("PHASING_ANALYSIS_IDS", ())) # ensure tuple

def _prep_phasing_analysis_param(self, *, analysis: hap_params.analysis) -> str:
if analysis == DEFAULT:
if analysis == base_params.DEFAULT:
# Use whatever is the default phasing analysis for this data resource.
assert self._default_phasing_analysis is not None
return self._default_phasing_analysis
Expand All @@ -74,7 +73,7 @@ def _prep_phasing_analysis_param(self, *, analysis: hap_params.analysis) -> str:
returns="Zarr hierarchy.",
)
def open_haplotype_sites(
self, analysis: hap_params.analysis = DEFAULT
self, analysis: hap_params.analysis = base_params.DEFAULT
) -> zarr.hierarchy.Group:
analysis = self._prep_phasing_analysis_param(analysis=analysis)
try:
Expand Down Expand Up @@ -172,7 +171,7 @@ def haplotype_sites(
self,
region: base_params.regions,
field: base_params.field,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
inline_array: base_params.inline_array = base_params.inline_array_default,
chunks: base_params.chunks = base_params.chunks_default,
) -> da.Array:
Expand Down Expand Up @@ -205,7 +204,7 @@ def haplotype_sites(
def open_haplotypes(
self,
sample_set: base_params.sample_set,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
) -> Optional[zarr.hierarchy.Group]:
analysis = self._prep_phasing_analysis_param(analysis=analysis)
try:
Expand Down Expand Up @@ -327,7 +326,7 @@ def _haplotypes_for_contig(
def haplotypes(
self,
region: base_params.regions,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
inline_array: base_params.inline_array = base_params.inline_array_default,
Expand Down
5 changes: 2 additions & 3 deletions malariagen_data/anoph/hapclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from ..util import CacheMiss, check_types, pdist_abs_hamming
from ..plotly_dendrogram import plot_dendrogram
from . import base_params, plotly_params, tree_params, hap_params, hapclust_params
from .base_params import DEFAULT
from .snp_data import AnophelesSnpData
from .hap_data import AnophelesHapData

Expand All @@ -35,7 +34,7 @@ def __init__(
def plot_haplotype_clustering(
self,
region: base_params.regions,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
cohort_size: Optional[base_params.cohort_size] = None,
Expand Down Expand Up @@ -186,7 +185,7 @@ def plot_haplotype_clustering(
def haplotype_pairwise_distances(
self,
region: base_params.regions,
analysis: hap_params.analysis = DEFAULT,
analysis: hap_params.analysis = base_params.DEFAULT,
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
cohort_size: Optional[base_params.cohort_size] = None,
Expand Down
2 changes: 1 addition & 1 deletion malariagen_data/anoph/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def pca(
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
sample_indices: Optional[base_params.sample_indices] = None,
site_mask: Optional[base_params.site_mask] = None,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
site_class: Optional[base_params.site_class] = None,
min_minor_ac: Optional[base_params.min_minor_ac] = None,
max_missing_an: Optional[base_params.max_missing_an] = None,
Expand Down
Loading

0 comments on commit f14f745

Please sign in to comment.