Skip to content

Commit

Permalink
Advanced diplotype clustering - follow-up (#550)
Browse files Browse the repository at this point in the history
* add diplotype clustering API docs

* tidy; dynamic height

* tidy legend

* tweaks

* fix typing

* refactor

* fix tests
  • Loading branch information
alimanfoo authored Jun 14, 2024
1 parent f8419eb commit 6d638c1
Show file tree
Hide file tree
Showing 14 changed files with 112 additions and 94 deletions.
8 changes: 8 additions & 0 deletions docs/source/Af1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ Haplotype clustering and network analysis
plot_haplotype_network
haplotype_pairwise_distances

Diplotype clustering
--------------------
.. autosummary::
:toctree: generated/

plot_diplotype_clustering
plot_diplotype_clustering_advanced

Fst analysis
------------
.. autosummary::
Expand Down
8 changes: 8 additions & 0 deletions docs/source/Ag3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ Haplotype clustering and network analysis
plot_haplotype_network
haplotype_pairwise_distances

Diplotype clustering
--------------------
.. autosummary::
:toctree: generated/

plot_diplotype_clustering
plot_diplotype_clustering_advanced

Fst analysis
------------
.. autosummary::
Expand Down
18 changes: 18 additions & 0 deletions malariagen_data/anoph/clustering_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Parameters for hierarchical clustering functions."""

from typing import Literal

from typing_extensions import Annotated, TypeAlias

linkage_method: TypeAlias = Annotated[
Literal["single", "complete", "average", "weighted", "centroid", "median", "ward"],
"""
The linkage algorithm to use. See the Linkage Methods section of the
scipy.cluster.hierarchy.linkage docs for full descriptions.
""",
]

leaf_y: TypeAlias = Annotated[
int,
"Y coordinate at which to plot the leaf markers.",
]
58 changes: 33 additions & 25 deletions malariagen_data/anoph/dipclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
base_params,
plotly_params,
tree_params,
clustering_params,
dipclust_params,
cnv_params,
)
Expand All @@ -42,10 +43,6 @@ def __init__(
@check_types
@doc(
summary="Hierarchically cluster diplotypes in region and produce an interactive plot.",
parameters=dict(
leaf_y="Y coordinate at which to plot the leaf markers.",
return_order_dict="Return a dictionary containing the order of samples in the dendrogram.",
),
)
def plot_diplotype_clustering(
self,
Expand All @@ -63,12 +60,12 @@ def plot_diplotype_clustering(
distance_sort: Optional[tree_params.distance_sort] = None,
title: plotly_params.title = True,
title_font_size: plotly_params.title_font_size = 14,
width: plotly_params.width = None,
height: plotly_params.height = 500,
width: plotly_params.fig_width = None,
height: plotly_params.fig_height = 500,
show: plotly_params.show = True,
renderer: plotly_params.renderer = None,
render_mode: plotly_params.render_mode = "svg",
leaf_y: int = 0,
leaf_y: clustering_params.leaf_y = 0,
marker_size: plotly_params.marker_size = 5,
line_width: plotly_params.line_width = 0.5,
line_color: plotly_params.line_color = "black",
Expand All @@ -79,8 +76,6 @@ def plot_diplotype_clustering(
) -> Optional[dict]:
import sys

debug = self._log.debug

# Normalise params.
if count_sort is None and distance_sort is None:
count_sort = True
Expand All @@ -90,7 +85,7 @@ def plot_diplotype_clustering(
# with larger numbers of nodes.
sys.setrecursionlimit(10_000)

debug("load sample metadata")
# Load sample metadata.
df_samples = self.sample_metadata(
sample_sets=sample_sets, sample_query=sample_query
)
Expand Down Expand Up @@ -448,7 +443,7 @@ def _dipclust_snp_trace(
df_snps = df_snps.query("af > @snp_filter_min_maf").drop(columns="af")

if not df_snps.empty:
snp_height = np.max([df_snps.shape[0] / 100, 0.2]) # minimum height of 0.2
n_snps = len(df_snps)
snp_trace = go.Heatmap(
z=df_snps.values,
y=df_snps.index.to_list(),
Expand All @@ -457,10 +452,10 @@ def _dipclust_snp_trace(
showscale=False,
)
else:
n_snps = 0
snp_trace = None
snp_height = 0

return snp_trace, snp_height
return snp_trace, n_snps

def _dipclust_concat_subplots(
self,
Expand Down Expand Up @@ -518,7 +513,6 @@ def _dipclust_concat_subplots(
heterozygosity="Plot heterozygosity track.",
snp_transcript="Plot amino acid variants for this transcript.",
cnv_region="Plot gene CNV calls for this region.",
leaf_y="Y coordinate at which to plot the leaf markers.",
snp_filter_min_maf="Filter amino acid variants with alternate allele frequency below this threshold.",
),
)
Expand Down Expand Up @@ -547,12 +541,15 @@ def plot_diplotype_clustering_advanced(
distance_sort: Optional[tree_params.distance_sort] = None,
title: plotly_params.title = True,
title_font_size: plotly_params.title_font_size = 14,
width: plotly_params.width = None,
height: plotly_params.height = 500,
width: plotly_params.fig_width = None,
dendrogram_height: plotly_params.height = 300,
heterozygosity_height: plotly_params.height = 25,
snp_row_height: plotly_params.height = 25,
cnv_row_height: plotly_params.height = 25,
show: plotly_params.show = True,
renderer: plotly_params.renderer = None,
render_mode: plotly_params.render_mode = "svg",
leaf_y: int = 0,
leaf_y: clustering_params.leaf_y = 0,
marker_size: plotly_params.marker_size = 5,
line_width: plotly_params.line_width = 0.5,
line_color: plotly_params.line_color = "black",
Expand Down Expand Up @@ -582,7 +579,7 @@ def plot_diplotype_clustering_advanced(
title=title,
title_font_size=title_font_size,
width=width,
height=height,
height=dendrogram_height,
show=False,
renderer=renderer,
render_mode=render_mode,
Expand All @@ -602,7 +599,7 @@ def plot_diplotype_clustering_advanced(
dendro_sample_id_order = res["dendro_sample_id_order"]

figures = [fig_dendro]
row_heights = [0.2]
subplot_heights = [dendrogram_height]

if heterozygosity:
het_trace = self._dipclust_het_bar_trace(
Expand All @@ -616,10 +613,10 @@ def plot_diplotype_clustering_advanced(
random_seed=random_seed,
)
figures.append(het_trace)
row_heights.append(0.012)
subplot_heights.append(heterozygosity_height)

if cnv_region:
cnv_trace, cnv_genes = self._dipclust_cnv_bar_trace(
cnv_trace, n_cnv_genes = self._dipclust_cnv_bar_trace(
cnv_region=cnv_region,
dendro_sample_id_order=dendro_sample_id_order,
sample_sets=sample_sets,
Expand All @@ -631,10 +628,10 @@ def plot_diplotype_clustering_advanced(
# see if the trace is not None.
if cnv_trace is not None:
figures.append(cnv_trace)
row_heights.append(0.015 * cnv_genes)
subplot_heights.append(cnv_row_height * n_cnv_genes)

if snp_transcript:
snp_trace, snp_height = self._dipclust_snp_trace(
snp_trace, n_snps = self._dipclust_snp_trace(
transcript=snp_transcript,
sample_sets=sample_sets,
sample_query=sample_query,
Expand All @@ -647,17 +644,20 @@ def plot_diplotype_clustering_advanced(

if snp_trace:
figures.append(snp_trace)
row_heights.append(snp_height)
subplot_heights.append(snp_row_height * n_snps)
else:
print(
f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot."
)

# Calculate total height based on subplot heights, plus a fixed
# additional component to allow for title, axes etc.
height = sum(subplot_heights) + 50
fig = self._dipclust_concat_subplots(
figures=figures,
width=width,
height=height,
row_heights=row_heights,
row_heights=subplot_heights,
sample_sets=sample_sets,
sample_query=sample_query,
region=region,
Expand All @@ -666,6 +666,14 @@ def plot_diplotype_clustering_advanced(

fig["layout"]["yaxis"]["title"] = f"Distance ({distance_metric})"

# Tidy up.
fig.update_layout(
title_font=dict(
size=title_font_size,
),
legend=dict(itemsizing=legend_sizing, tracegroupgap=0),
)

if show:
fig.show(renderer=renderer)
return None
Expand Down
11 changes: 1 addition & 10 deletions malariagen_data/anoph/dipclust_params.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
"""Parameters for diplotype clustering functions."""

from typing import Literal
from typing_extensions import Annotated, TypeAlias
from .diplotype_distance_params import distance_metric
from .clustering_params import linkage_method


linkage_method: TypeAlias = Annotated[
Literal["single", "complete", "average", "weighted", "centroid", "median", "ward"],
"""
The linkage algorithm to use. See the Linkage Methods section of the
scipy.cluster.hierarchy.linkage docs for full descriptions.
""",
]

linkage_method_default: linkage_method = "complete"

distance_metric_default: distance_metric = "cityblock"
4 changes: 2 additions & 2 deletions malariagen_data/anoph/fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,8 @@ def plot_pairwise_average_fst(
zmax: Optional[plotly_params.zmax] = None,
text_auto: plotly_params.text_auto = ".3f",
color_continuous_scale: plotly_params.color_continuous_scale = "gray_r",
width: plotly_params.width = 700,
height: plotly_params.height = 600,
width: plotly_params.fig_width = 700,
height: plotly_params.fig_height = 600,
show: plotly_params.show = True,
renderer: plotly_params.renderer = None,
**kwargs,
Expand Down
20 changes: 12 additions & 8 deletions malariagen_data/anoph/hapclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,14 @@

from ..util import CacheMiss, check_types, pdist_abs_hamming
from ..plotly_dendrogram import plot_dendrogram
from . import base_params, plotly_params, tree_params, hap_params, hapclust_params
from . import (
base_params,
plotly_params,
tree_params,
hap_params,
clustering_params,
hapclust_params,
)
from .snp_data import AnophelesSnpData
from .hap_data import AnophelesHapData

Expand All @@ -27,9 +34,6 @@ def __init__(
summary="""
Hierarchically cluster haplotypes in region and produce an interactive plot.
""",
parameters=dict(
leaf_y="Y coordinate at which to plot the leaf markers.",
),
)
def plot_haplotype_clustering(
self,
Expand All @@ -46,12 +50,12 @@ def plot_haplotype_clustering(
distance_sort: Optional[tree_params.distance_sort] = None,
title: plotly_params.title = True,
title_font_size: plotly_params.title_font_size = 14,
width: plotly_params.width = None,
height: plotly_params.height = 500,
width: plotly_params.fig_width = None,
height: plotly_params.fig_height = 500,
show: plotly_params.show = True,
renderer: plotly_params.renderer = None,
render_mode: plotly_params.render_mode = "svg",
leaf_y: int = 0,
leaf_y: clustering_params.leaf_y = 0,
marker_size: plotly_params.marker_size = 5,
line_width: plotly_params.line_width = 0.5,
line_color: plotly_params.line_color = "black",
Expand Down Expand Up @@ -133,7 +137,7 @@ def plot_haplotype_clustering(

# Create the plot.
with self._spinner("Plot dendrogram"):
fig = plot_dendrogram(
fig, _ = plot_dendrogram(
dist=dist,
linkage_method=linkage_method,
count_sort=count_sort,
Expand Down
12 changes: 1 addition & 11 deletions malariagen_data/anoph/hapclust_params.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@
"""Parameters for haplotype clustering functions."""

from typing import Literal

from typing_extensions import Annotated, TypeAlias

linkage_method: TypeAlias = Annotated[
Literal["single", "complete", "average", "weighted", "centroid", "median", "ward"],
"""
The linkage algorithm to use. See the Linkage Methods section of the
scipy.cluster.hierarchy.linkage docs for full descriptions.
""",
]
from .clustering_params import linkage_method

linkage_method_default: linkage_method = "single"
12 changes: 6 additions & 6 deletions malariagen_data/anoph/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ def _pca(
def plot_pca_variance(
self,
evr: pca_params.evr,
width: plotly_params.width = 900,
height: plotly_params.height = 400,
width: plotly_params.fig_width = 900,
height: plotly_params.fig_height = 400,
show: plotly_params.show = True,
renderer: plotly_params.renderer = None,
**kwargs,
Expand Down Expand Up @@ -257,8 +257,8 @@ def plot_pca_coords(
opacity: float = 0.9,
jitter_frac: plotly_params.jitter_frac = 0.02,
random_seed: base_params.random_seed = 42,
width: plotly_params.width = 900,
height: plotly_params.height = 600,
width: plotly_params.fig_width = 900,
height: plotly_params.fig_height = 600,
marker_size: plotly_params.marker_size = 10,
color_discrete_sequence: plotly_params.color_discrete_sequence = None,
color_discrete_map: plotly_params.color_discrete_map = None,
Expand Down Expand Up @@ -361,8 +361,8 @@ def plot_pca_coords_3d(
symbol: plotly_params.symbol = None,
jitter_frac: plotly_params.jitter_frac = 0.02,
random_seed: base_params.random_seed = 42,
width: plotly_params.width = 900,
height: plotly_params.height = 600,
width: plotly_params.fig_width = 900,
height: plotly_params.fig_height = 600,
marker_size: plotly_params.marker_size = 5,
color_discrete_sequence: plotly_params.color_discrete_sequence = None,
color_discrete_map: plotly_params.color_discrete_map = None,
Expand Down
13 changes: 9 additions & 4 deletions malariagen_data/anoph/plotly_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,19 @@
"Y axis label.",
]

width: TypeAlias = Annotated[
fig_width: TypeAlias = Annotated[
Optional[int],
"Plot width in pixels (px).",
"Figure width in pixels (px).",
]

height: TypeAlias = Annotated[
fig_height: TypeAlias = Annotated[
Optional[int],
"Plot height in pixels (px).",
"Figure weight in pixels (px).",
]

height: TypeAlias = Annotated[
int,
"Height in pixels (px).",
]

aspect: TypeAlias = Annotated[
Expand Down
4 changes: 2 additions & 2 deletions malariagen_data/anoph/sample_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,8 @@ def plot_samples_bar(
sample_sets: Optional[base_params.sample_sets] = None,
sample_query: Optional[base_params.sample_query] = None,
template: plotly_params.template = "plotly_white",
width: plotly_params.width = 800,
height: plotly_params.height = 600,
width: plotly_params.fig_width = 800,
height: plotly_params.fig_height = 600,
show: plotly_params.show = True,
renderer: plotly_params.renderer = None,
**kwargs,
Expand Down
Loading

0 comments on commit 6d638c1

Please sign in to comment.