Skip to content

Commit

Permalink
Docstrings for Anopheles extra metadata methods (#330)
Browse files Browse the repository at this point in the history
* add docstring for extra metadata methods

* clean up some comments
  • Loading branch information
alimanfoo authored Feb 2, 2023
1 parent 1b4401f commit 6e951d1
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
2 changes: 0 additions & 2 deletions malariagen_data/ag3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3791,7 +3791,6 @@ def plot_haplotype_clustering(
ht.T,
distfun=lambda x: _hamming_to_snps(x),
linkagefun=lambda x: linkage(x, method=linkage_method),
# FIXME: expected type 'list', got 'ndarray'
labels=leaf_labels,
color_threshold=0,
count_sort=count_sort,
Expand Down Expand Up @@ -3941,7 +3940,6 @@ def plot_haplotype_network(

from itertools import cycle

# FIXME: unresolved references
import dash_cytoscape as cyto
import plotly.express as px
from dash import dcc, html
Expand Down
30 changes: 19 additions & 11 deletions malariagen_data/anopheles.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@


class AnophelesDataResource(ABC):

# TODO: parent class docstring
"""Base class for Anopheles data resources."""

def __init__(
self,
Expand Down Expand Up @@ -1295,6 +1294,23 @@ def sample_metadata(
return df_samples.copy()

def add_extra_metadata(self, data, on="sample_id"):
"""Add extra sample metadata, e.g., including additional columns
which you would like to use to query and group samples.
Parameters
----------
data : DataFrame
A data frame with one row per sample. Must include either a
"sample_id" or "partner_sample_id" column.
on : {"sample_id", "partner_sample_id"}
Name of column to use when merging with sample metadata.
Notes
-----
The values in the column containing sample identifiers must be
unique.
"""

# check parameters
if not isinstance(data, pd.DataFrame):
Expand All @@ -1320,6 +1336,7 @@ def add_extra_metadata(self, data, on="sample_id"):
self._extra_metadata.append((on, data.copy()))

def clear_extra_metadata(self):
"""Clear any extra metadata previously added."""
self._extra_metadata = []

def _site_filters(
Expand Down Expand Up @@ -2640,7 +2657,6 @@ def _pca(
)

debug("perform allele count")
# FIXME: Parameter 'cohort_size', 'random_seed', 'site_class' unfilled
ac = self.snp_allele_counts(
region=region,
sample_sets=sample_sets,
Expand Down Expand Up @@ -4081,7 +4097,6 @@ def plot_snps_track(
raise ValueError("Region is too large, please provide a smaller region.")

debug("compute allele counts")
# FIXME: Parameters 'random_seed', 'site_class' unfilled
ac = allel.AlleleCountsArray(
self.snp_allele_counts(
region=region,
Expand Down Expand Up @@ -4201,7 +4216,6 @@ def plot_snps_track(
source=data,
name="snps",
)
# TODO add legend?

debug("tidy plot")
fig.yaxis.ticker = bkmod.FixedTicker(
Expand Down Expand Up @@ -4676,7 +4690,6 @@ def aa_allele_frequencies_advanced(
ds_aa_frq = group_by_aa_change.map(self._map_snp_to_aa_change_frq_ds)

debug("add back in cohort variables, unaffected by aggregation")
# FIXME: Unresolved attribute reference 'startswith' for class 'Hashable'
cohort_vars = [v for v in ds_snp_frq if v.startswith("cohort_")]
for v in cohort_vars:
ds_aa_frq[v] = ds_snp_frq[v]
Expand Down Expand Up @@ -4764,7 +4777,6 @@ def _block_jackknife_cohort_diversity_stats(
seg_data = ac.allelism() - 1

debug("compute estimates from all data")
# FIXME: variable in function should be lowercase
theta_pi_abs_data = np.sum(mpd_data)
theta_pi_data = theta_pi_abs_data / n_sites
S_data = np.sum(seg_data)
Expand Down Expand Up @@ -4798,7 +4810,6 @@ def _block_jackknife_cohort_diversity_stats(
jack_theta_pi.append(theta_pi_j)

# theta_w
# FIXME: variable in function should be lowercase
seg_j = seg_data[loc_j]
S_j = np.sum(seg_j)
theta_w_abs_j = S_j / a1
Expand Down Expand Up @@ -5013,7 +5024,6 @@ def cohort_diversity_stats(

return pd.Series(stats)

# TODO: compare with cohort_diversity_stats()
def diversity_stats(
self,
cohorts,
Expand Down Expand Up @@ -5520,7 +5530,6 @@ def plot_frequencies_time_series(
title = ds.attrs.get("title", None)

debug("extract cohorts into a dataframe")
# FIXME: unresolved attribute reference 'startswith'
cohort_vars = [v for v in ds if v.startswith("cohort_")]
df_cohorts = ds[cohort_vars].to_dataframe()
df_cohorts.columns = [c.split("cohort_")[1] for c in df_cohorts.columns]
Expand Down Expand Up @@ -6194,7 +6203,6 @@ def plot_samples_interactive_map(

debug("create a map")
if basemap is None:
# FIXME: cannot find reference 'Esri'
basemap = ipyleaflet.basemaps.Esri.WorldImagery
samples_map = ipyleaflet.Map(
center=center,
Expand Down

0 comments on commit 6e951d1

Please sign in to comment.