diff --git a/malariagen_data/anoph/fst.py b/malariagen_data/anoph/fst.py index abf54d1b3..ee12f2147 100644 --- a/malariagen_data/anoph/fst.py +++ b/malariagen_data/anoph/fst.py @@ -40,6 +40,7 @@ def _fst_gwss( random_seed, inline_array, chunks, + clip_min, ): # Compute allele counts. ac1 = self.snp_allele_counts( @@ -79,7 +80,7 @@ def _fst_gwss( with self._spinner(desc="Compute Fst"): fst = allel.moving_hudson_fst(ac1, ac2, size=window_size) # Sometimes Fst can be very slightly below zero, clip for simplicity. - fst = np.clip(fst, a_min=0, a_max=1) + fst = np.clip(fst, a_min=clip_min, a_max=1) x = allel.moving_statistic(pos, statistic=np.mean, size=window_size) results = dict(x=x, fst=fst) @@ -115,6 +116,7 @@ def fst_gwss( random_seed: base_params.random_seed = 42, inline_array: base_params.inline_array = base_params.inline_array_default, chunks: base_params.chunks = base_params.chunks_default, + clip_min: fst_params.clip_min = 0.0, ) -> Tuple[np.ndarray, np.ndarray]: # Change this name if you ever change the behaviour of this function, to # invalidate any previously cached data. @@ -131,6 +133,7 @@ def fst_gwss( min_cohort_size=min_cohort_size, max_cohort_size=max_cohort_size, random_seed=random_seed, + clip_min=clip_min, ) try: @@ -175,6 +178,7 @@ def plot_fst_gwss_track( show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, + clip_min: fst_params.clip_min = 0.0, ) -> gplt_params.figure: # compute Fst x, fst = self.fst_gwss( @@ -188,6 +192,7 @@ def plot_fst_gwss_track( sample_sets=sample_sets, site_mask=site_mask, random_seed=random_seed, + clip_min=clip_min, ) # determine X axis range @@ -277,6 +282,7 @@ def plot_fst_gwss( genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, + clip_min: fst_params.clip_min = 0.0, ) -> gplt_params.figure: # gwss track fig1 = self.plot_fst_gwss_track( @@ -296,6 +302,7 @@ def plot_fst_gwss( height=track_height, show=False, output_backend=output_backend, + clip_min=clip_min, ) fig1.xaxis.visible = False diff --git a/malariagen_data/anoph/fst_params.py b/malariagen_data/anoph/fst_params.py index 596a6bf16..5b835c297 100644 --- a/malariagen_data/anoph/fst_params.py +++ b/malariagen_data/anoph/fst_params.py @@ -16,6 +16,13 @@ min_cohort_size_default: base_params.min_cohort_size = 15 max_cohort_size_default: base_params.max_cohort_size = 50 +clip_min: TypeAlias = Annotated[ + Optional[float], + """ + Minimum value for Fst. Values below this are clipped to this value. + """, +] + df_pairwise_fst: TypeAlias = Annotated[ pd.DataFrame, """ diff --git a/malariagen_data/anoph/genome_features.py b/malariagen_data/anoph/genome_features.py index fd1f40068..03a0b185c 100644 --- a/malariagen_data/anoph/genome_features.py +++ b/malariagen_data/anoph/genome_features.py @@ -362,6 +362,7 @@ def plot_genes( data["top"] = data["bottom"] + 0.8 debug("tidy up missing values for presentation") + data = data.drop(["score", "phase"], axis=1) data.fillna("", inplace=True) debug("make a figure")