Skip to content

Commit

Permalink
Not sure what is going on
Browse files Browse the repository at this point in the history
  • Loading branch information
jonbrenas committed Aug 2, 2024
2 parents df13732 + 8c9389d commit ab45528
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 23 deletions.
6 changes: 3 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ For accessing data in Google Cloud Storage (GCS) you will also need to authentic
If you are using ``malariagen_data`` from within Google Colab, authentication will be automatically
initiated, please allow access when requested.

If you are using ``malariagen_data`` from any location other than Google Colab, you will need to [set up application
default credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc). Generally
the best way to do this will be to [install the Google Cloud CLI](https://cloud.google.com/sdk/docs/install)
If you are using ``malariagen_data`` from any location other than Google Colab, you will need to `set up application
default credentials <https://cloud.google.com/docs/authentication/provide-credentials-adc>`_. Generally
the best way to do this will be to `install the Google Cloud CLI <https://cloud.google.com/sdk/docs/install>`_
and then run the following command::

gcloud auth application-default login
Expand Down
8 changes: 4 additions & 4 deletions malariagen_data/anoph/dipclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ def plot_diplotype_clustering_advanced(
)

fig_dendro = res["figure"]
n_snps = res["n_snps"]
n_snps_cluster = res["n_snps"]
dendro_sample_id_order = res["dendro_sample_id_order"]

figures = [fig_dendro]
Expand Down Expand Up @@ -631,7 +631,7 @@ def plot_diplotype_clustering_advanced(
subplot_heights.append(cnv_row_height * n_cnv_genes)

if snp_transcript:
snp_trace, n_snps = self._dipclust_snp_trace(
snp_trace, n_snps_transcript = self._dipclust_snp_trace(
transcript=snp_transcript,
sample_sets=sample_sets,
sample_query=sample_query,
Expand All @@ -644,7 +644,7 @@ def plot_diplotype_clustering_advanced(

if snp_trace:
figures.append(snp_trace)
subplot_heights.append(snp_row_height * n_snps)
subplot_heights.append(snp_row_height * n_snps_transcript)
else:
print(
f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot."
Expand All @@ -661,7 +661,7 @@ def plot_diplotype_clustering_advanced(
sample_sets=sample_sets,
sample_query=sample_query,
region=region,
n_snps=n_snps,
n_snps=n_snps_cluster,
)

fig["layout"]["yaxis"]["title"] = f"Distance ({distance_metric})"
Expand Down
10 changes: 7 additions & 3 deletions malariagen_data/anoph/fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def pairwise_average_fst(
def plot_pairwise_average_fst(
self,
fst_df: fst_params.df_pairwise_fst,
annotate_se: bool = False,
annotation: fst_params.annotation = None,
zmin: Optional[plotly_params.zmin] = 0.0,
zmax: Optional[plotly_params.zmax] = None,
text_auto: plotly_params.text_auto = ".3f",
Expand All @@ -500,9 +500,13 @@ def plot_pairwise_average_fst(
index = fst_df.iloc[index_key]["cohort1"]
col = fst_df.iloc[index_key]["cohort2"]
fst = fst_df.iloc[index_key]["fst"]
if annotate_se is True:
fig_df[index][col] = fst
if annotation == "standard error":
se = fst_df.iloc[index_key]["se"]
fig_df.loc[index, col] = se
fig_df[col][index] = se
elif annotation == "Z score":
zs = fst_df.iloc[index_key]["fst"] / fst_df.iloc[index_key]["se"]
fig_df[col][index] = zs
else:
fig_df.loc[index, col] = fst

Expand Down
11 changes: 10 additions & 1 deletion malariagen_data/anoph/fst_params.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Parameter definitions for Fst functions."""

from typing import Optional
from typing import Optional, Literal

import pandas as pd
from typing_extensions import Annotated, TypeAlias
Expand All @@ -22,3 +22,12 @@
A dataframe of pairwise Fst and standard error values.
""",
]

annotation: TypeAlias = Annotated[
Optional[Literal["standard error", "Z score"]],
"""
How to annotate the upper-right corner of the plot. Default behaviour (None) is using Fst, other options
are using the standard error (if annotation is 'standard error') or the Z score of the two
cohorts being the same (if annotation is 'Z score').
""",
]
10 changes: 7 additions & 3 deletions malariagen_data/anoph/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,12 @@ def pca(
sample_indices: Optional[base_params.sample_indices] = None,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
site_class: Optional[base_params.site_class] = None,
min_minor_ac: Optional[base_params.min_minor_ac] = None,
max_missing_an: Optional[base_params.max_missing_an] = None,
min_minor_ac: Optional[
base_params.min_minor_ac
] = pca_params.min_minor_ac_default,
max_missing_an: Optional[
base_params.max_missing_an
] = pca_params.max_missing_an_default,
cohort_size: Optional[base_params.cohort_size] = None,
min_cohort_size: Optional[base_params.min_cohort_size] = None,
max_cohort_size: Optional[base_params.max_cohort_size] = None,
Expand All @@ -73,7 +77,7 @@ def pca(
) -> Tuple[pca_params.df_pca, pca_params.evr]:
# Change this name if you ever change the behaviour of this function, to
# invalidate any previously cached data.
name = "pca_v2"
name = "pca_v3"

# Normalize params for consistent hash value.
(
Expand Down
5 changes: 5 additions & 0 deletions malariagen_data/anoph/pca_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
from typing_extensions import Annotated, TypeAlias
from . import base_params

n_components: TypeAlias = Annotated[
int,
Expand All @@ -23,3 +24,7 @@
np.ndarray,
"An array of explained variance ratios, one per component.",
]

min_minor_ac_default: base_params.min_minor_ac = 2

max_missing_an_default: base_params.max_missing_an = 0
2 changes: 1 addition & 1 deletion malariagen_data/anoph/snp_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1655,7 +1655,7 @@ def biallelic_snp_calls(
ds_out = xr.Dataset(coords=coords, data_vars=data_vars, attrs=ds.attrs)

# Apply conditions.
if max_missing_an or min_minor_ac:
if max_missing_an is not None or min_minor_ac is not None:
loc_out = np.ones(ds_out.sizes["variants"], dtype=bool)

# Apply missingness condition.
Expand Down
9 changes: 7 additions & 2 deletions malariagen_data/anopheles.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
hapnet_params,
het_params,
ihs_params,
pca_params,
plotly_params,
xpehh_params,
)
Expand Down Expand Up @@ -3158,8 +3159,12 @@ def plot_njt(
sample_indices: Optional[base_params.sample_indices] = None,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
site_class: Optional[base_params.site_class] = None,
min_minor_ac: Optional[base_params.min_minor_ac] = None,
max_missing_an: Optional[base_params.max_missing_an] = None,
min_minor_ac: Optional[
base_params.min_minor_ac
] = pca_params.min_minor_ac_default,
max_missing_an: Optional[
base_params.max_missing_an
] = pca_params.max_missing_an_default,
cohort_size: Optional[base_params.cohort_size] = None,
min_cohort_size: Optional[base_params.min_cohort_size] = None,
max_cohort_size: Optional[base_params.max_cohort_size] = None,
Expand Down
4 changes: 2 additions & 2 deletions notebooks/plot_pairwise_average_fst.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
"metadata": {},
"outputs": [],
"source": [
"ag3.plot_pairwise_average_fst(pairwise_fst_df, annotate_se=True)"
"ag3.plot_pairwise_average_fst(pairwise_fst_df, annotation=\"standard error\")"
]
},
{
Expand All @@ -104,7 +104,7 @@
"metadata": {},
"outputs": [],
"source": [
"ag3.plot_pairwise_average_fst(pairwise_fst_df, annotate_se=False)"
"ag3.plot_pairwise_average_fst(pairwise_fst_df, annotation=\"Z score\")"
]
},
{
Expand Down
6 changes: 5 additions & 1 deletion tests/anoph/test_fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,11 @@ def check_pairwise_average_fst(api: AnophelesFstAnalysis, fst_params):
if len(fst_df) > 0:
fig = api.plot_pairwise_average_fst(fst_df, show=False)
assert isinstance(fig, go.Figure)
fig = api.plot_pairwise_average_fst(fst_df, annotate_se=True, show=False)
fig = api.plot_pairwise_average_fst(
fst_df, annotation="standard error", show=False
)
assert isinstance(fig, go.Figure)
fig = api.plot_pairwise_average_fst(fst_df, annotation="Z score", show=False)
assert isinstance(fig, go.Figure)


Expand Down
12 changes: 9 additions & 3 deletions tests/anoph/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from malariagen_data import af1 as _af1
from malariagen_data import ag3 as _ag3
from malariagen_data.anoph.pca import AnophelesPca
from malariagen_data.anoph import pca_params


@pytest.fixture
Expand Down Expand Up @@ -83,13 +84,18 @@ def test_pca_plotting(fixture, api: AnophelesPca):
sample_sets=random.sample(all_sample_sets, 2),
site_mask=random.choice((None,) + api.site_mask_ids),
)
ds = api.biallelic_snp_calls(**data_params)
ds = api.biallelic_snp_calls(
min_minor_ac=pca_params.min_minor_ac_default,
max_missing_an=pca_params.max_missing_an_default,
**data_params,
)

# PCA parameters.
n_samples = ds.sizes["samples"]
n_snps_available = ds.sizes["variants"]
n_snps = random.randint(n_samples, n_snps_available)
n_components = random.randint(3, n_samples)
n_snps = random.randint(1, n_snps_available)
# PC3 required for plot_pca_coords_3d()
n_components = random.randint(3, min(n_samples, n_snps))

# Run the PCA.
pca_df, pca_evr = api.pca(
Expand Down

0 comments on commit ab45528

Please sign in to comment.