Docstrings for Anopheles extra metadata methods (#330)

* add docstring for extra metadata methods * clean up some comments
malariagen · Feb 2, 2023 · 6e951d1 · 6e951d1
1 parent 1b4401f
commit 6e951d1
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 13 deletions.
diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py
@@ -3791,7 +3791,6 @@ def plot_haplotype_clustering(
             ht.T,
             distfun=lambda x: _hamming_to_snps(x),
             linkagefun=lambda x: linkage(x, method=linkage_method),
-            # FIXME: expected type 'list', got 'ndarray'
             labels=leaf_labels,
             color_threshold=0,
             count_sort=count_sort,
@@ -3941,7 +3940,6 @@ def plot_haplotype_network(
 
         from itertools import cycle
 
-        # FIXME: unresolved references
         import dash_cytoscape as cyto
         import plotly.express as px
         from dash import dcc, html

diff --git a/malariagen_data/anopheles.py b/malariagen_data/anopheles.py
@@ -60,8 +60,7 @@
 
 
 class AnophelesDataResource(ABC):
-
-    # TODO: parent class docstring
+    """Base class for Anopheles data resources."""
 
     def __init__(
         self,
@@ -1295,6 +1294,23 @@ def sample_metadata(
         return df_samples.copy()
 
     def add_extra_metadata(self, data, on="sample_id"):
+        """Add extra sample metadata, e.g., including additional columns
+        which you would like to use to query and group samples.
+
+        Parameters
+        ----------
+        data : DataFrame
+            A data frame with one row per sample. Must include either a
+            "sample_id" or "partner_sample_id" column.
+        on : {"sample_id", "partner_sample_id"}
+            Name of column to use when merging with sample metadata.
+
+        Notes
+        -----
+        The values in the column containing sample identifiers must be
+        unique.
+
+        """
 
         # check parameters
         if not isinstance(data, pd.DataFrame):
@@ -1320,6 +1336,7 @@ def add_extra_metadata(self, data, on="sample_id"):
         self._extra_metadata.append((on, data.copy()))
 
     def clear_extra_metadata(self):
+        """Clear any extra metadata previously added."""
         self._extra_metadata = []
 
     def _site_filters(
@@ -2640,7 +2657,6 @@ def _pca(
         )
 
         debug("perform allele count")
-        # FIXME: Parameter 'cohort_size', 'random_seed', 'site_class' unfilled
         ac = self.snp_allele_counts(
             region=region,
             sample_sets=sample_sets,
@@ -4081,7 +4097,6 @@ def plot_snps_track(
             raise ValueError("Region is too large, please provide a smaller region.")
 
         debug("compute allele counts")
-        # FIXME: Parameters 'random_seed', 'site_class' unfilled
         ac = allel.AlleleCountsArray(
             self.snp_allele_counts(
                 region=region,
@@ -4201,7 +4216,6 @@ def plot_snps_track(
             source=data,
             name="snps",
         )
-        # TODO add legend?
 
         debug("tidy plot")
         fig.yaxis.ticker = bkmod.FixedTicker(
@@ -4676,7 +4690,6 @@ def aa_allele_frequencies_advanced(
         ds_aa_frq = group_by_aa_change.map(self._map_snp_to_aa_change_frq_ds)
 
         debug("add back in cohort variables, unaffected by aggregation")
-        # FIXME: Unresolved attribute reference 'startswith' for class 'Hashable'
         cohort_vars = [v for v in ds_snp_frq if v.startswith("cohort_")]
         for v in cohort_vars:
             ds_aa_frq[v] = ds_snp_frq[v]
@@ -4764,7 +4777,6 @@ def _block_jackknife_cohort_diversity_stats(
         seg_data = ac.allelism() - 1
 
         debug("compute estimates from all data")
-        # FIXME: variable in function should be lowercase
         theta_pi_abs_data = np.sum(mpd_data)
         theta_pi_data = theta_pi_abs_data / n_sites
         S_data = np.sum(seg_data)
@@ -4798,7 +4810,6 @@ def _block_jackknife_cohort_diversity_stats(
             jack_theta_pi.append(theta_pi_j)
 
             # theta_w
-            # FIXME: variable in function should be lowercase
             seg_j = seg_data[loc_j]
             S_j = np.sum(seg_j)
             theta_w_abs_j = S_j / a1
@@ -5013,7 +5024,6 @@ def cohort_diversity_stats(
 
         return pd.Series(stats)
 
-    # TODO: compare with cohort_diversity_stats()
     def diversity_stats(
         self,
         cohorts,
@@ -5520,7 +5530,6 @@ def plot_frequencies_time_series(
             title = ds.attrs.get("title", None)
 
         debug("extract cohorts into a dataframe")
-        # FIXME: unresolved attribute reference 'startswith'
         cohort_vars = [v for v in ds if v.startswith("cohort_")]
         df_cohorts = ds[cohort_vars].to_dataframe()
         df_cohorts.columns = [c.split("cohort_")[1] for c in df_cohorts.columns]
@@ -6194,7 +6203,6 @@ def plot_samples_interactive_map(
 
         debug("create a map")
         if basemap is None:
-            # FIXME: cannot find reference 'Esri'
             basemap = ipyleaflet.basemaps.Esri.WorldImagery
         samples_map = ipyleaflet.Map(
             center=center,