Skip to content

Commit

Permalink
Merge branch 'master' into GH660_fix_cnv_discordant_read_calls
Browse files Browse the repository at this point in the history
  • Loading branch information
leehart committed Dec 3, 2024
2 parents d498230 + 6c27d20 commit bc6de13
Show file tree
Hide file tree
Showing 28 changed files with 1,957 additions and 803 deletions.
1 change: 1 addition & 0 deletions docs/source/Af1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ SNP data access
is_accessible
biallelic_snp_calls
biallelic_diplotypes
biallelic_snps_to_plink

Haplotype data access
---------------------
Expand Down
1 change: 1 addition & 0 deletions docs/source/Ag3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ SNP data access
is_accessible
biallelic_snp_calls
biallelic_diplotypes
biallelic_snps_to_plink

Haplotype data access
---------------------
Expand Down
1 change: 1 addition & 0 deletions malariagen_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .amin1 import Amin1
from .anopheles import AnophelesDataResource, Region
from .pf7 import Pf7
from .pf8 import Pf8
from .pv4 import Pv4
from .util import SiteClass

Expand Down
8 changes: 8 additions & 0 deletions malariagen_data/anoph/cnv_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ def plot_cnv_hmm_coverage(
line_kwargs: Optional[gplt_params.line_kwargs] = None,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -782,6 +784,8 @@ def plot_cnv_hmm_coverage(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

debug("combine plots into a single figure")
Expand Down Expand Up @@ -960,6 +964,8 @@ def plot_cnv_hmm_heatmap(
track_height: Optional[gplt_params.track_height] = None,
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -989,6 +995,8 @@ def plot_cnv_hmm_heatmap(
height=genes_height,
x_range=fig1.x_range,
show=False,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

debug("combine plots into a single figure")
Expand Down
17 changes: 10 additions & 7 deletions malariagen_data/anoph/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numba # type: ignore
import numpy as np
from numpydoc_decorator import doc # type: ignore
import anjl.params # type: ignore

# Internal imports.
from .snp_data import AnophelesSnpData
Expand Down Expand Up @@ -410,10 +409,10 @@ def plot_njt(
metric: distance_params.distance_metric = distance_params.default_distance_metric,
distance_sort: Optional[tree_params.distance_sort] = None,
count_sort: Optional[tree_params.count_sort] = None,
center_x: anjl.params.center_x = 0,
center_y: anjl.params.center_y = 0,
arc_start: anjl.params.arc_start = 0,
arc_stop: anjl.params.arc_stop = 2 * math.pi,
center_x: distance_params.center_x = 0,
center_y: distance_params.center_y = 0,
arc_start: distance_params.arc_start = 0,
arc_stop: distance_params.arc_stop = 2 * math.pi,
width: plotly_params.fig_width = 800,
height: plotly_params.fig_height = 600,
show: plotly_params.show = True,
Expand All @@ -426,8 +425,8 @@ def plot_njt(
color_discrete_sequence: plotly_params.color_discrete_sequence = None,
color_discrete_map: plotly_params.color_discrete_map = None,
category_orders: plotly_params.category_order = None,
edge_legend: anjl.params.edge_legend = False,
leaf_legend: anjl.params.leaf_legend = True,
edge_legend: distance_params.edge_legend = False,
leaf_legend: distance_params.leaf_legend = True,
legend_sizing: plotly_params.legend_sizing = "constant",
thin_offset: base_params.thin_offset = 0,
sample_sets: Optional[base_params.sample_sets] = None,
Expand All @@ -449,6 +448,10 @@ def plot_njt(
inline_array: base_params.inline_array = base_params.inline_array_default,
chunks: base_params.chunks = base_params.native_chunks,
) -> plotly_params.figure:
# Only import anjl if needed, as it requires a couple of seconds to compile
# functions.
import anjl # type: ignore

# Normalise params.
if count_sort is None and distance_sort is None:
count_sort = True
Expand Down
17 changes: 17 additions & 0 deletions malariagen_data/anoph/distance_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,20 @@
]

default_nj_algorithm: nj_algorithm = "dynamic"

center_x: TypeAlias = Annotated[int | float, "X coordinate where plotting is centered."]

center_y: TypeAlias = Annotated[int | float, "Y coordinate where plotting is centered."]

arc_start: TypeAlias = Annotated[int | float, "Angle where tree layout begins."]

arc_stop: TypeAlias = Annotated[int | float, "Angle where tree layout ends."]

edge_legend: TypeAlias = Annotated[
bool, "Show legend entries for the different edge (line) colors."
]

leaf_legend: TypeAlias = Annotated[
bool,
"Show legend entries for the different leaf node (scatter) colors and symbols.",
]
4 changes: 4 additions & 0 deletions malariagen_data/anoph/fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ def plot_fst_gwss(
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
clip_min: fst_params.clip_min = 0.0,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# gwss track
fig1 = self.plot_fst_gwss_track(
Expand Down Expand Up @@ -327,6 +329,8 @@ def plot_fst_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# combine plots into a single figure
Expand Down
4 changes: 4 additions & 0 deletions malariagen_data/anoph/g123.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ def plot_g123_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
inline_array: base_params.inline_array = base_params.inline_array_default,
chunks: base_params.chunks = base_params.native_chunks,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# gwss track
fig1 = self.plot_g123_gwss_track(
Expand Down Expand Up @@ -472,6 +474,8 @@ def plot_g123_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# combine plots into a single figure
Expand Down
97 changes: 97 additions & 0 deletions malariagen_data/anoph/genome_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ def plot_genes(
x_range: Optional[gplt_params.x_range] = None,
title: Optional[gplt_params.title] = None,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -408,6 +410,101 @@ def plot_genes(
line_width=0,
)

if gene_labels:
debug("determine new figure height and range to accommodate gene labels")

# Increase the figure height by a certain factor, to accommodate labels.
height_increase_factor = 1.3
fig.height = int(fig.height * height_increase_factor)

# Get the original y_range.
# Note: fig.y_range is not subscriptable.
orig_y_range = fig.y_range.start, fig.y_range.end

# Determine the midpoint of the original range, to rescale outward from there.
orig_mid_y_range = (orig_y_range[0] + orig_y_range[1]) / 2
orig_y_range_extent = orig_y_range[1] - orig_y_range[0]

# Determine the new start and end points of the extended range.
new_y_range_extent = orig_y_range_extent * height_increase_factor
new_y_range_extent_half = new_y_range_extent / 2
new_y_start = orig_mid_y_range - new_y_range_extent_half
new_y_end = orig_mid_y_range + new_y_range_extent_half

# Set the new y_range.
fig.y_range = bokeh.models.Range1d(new_y_start, new_y_end)

debug("determine midpoint of each gene rectangle")
data["mid_x"] = (data["start"] + data["end"]) / 2

debug("make gene labels and pointers")

# Put gene_labels into a new column, where the gene_id matches.
# Fill unmapped genes with empty strings, otherwise "NaN" would be displayed.
data["gene_label"] = data["ID"].map(gene_labels).fillna("")

# Put gene pointers (▲ or ▼) in a new column, depending on the strand.
# Except if the gene_label is null or an empty string, which should not be shown.
data["gene_pointer"] = data.apply(
lambda row: ("▼" if row["strand"] == "+" else "▲")
if row["gene_label"]
else "",
axis=1,
)

# Put the pointer above or below the gene rectangle, depending on + or - strand.
neg_strand_pointer_y = orig_mid_y_range - 1.1
pos_strand_pointer_y = orig_mid_y_range + 1.1
data["pointer_y"] = data["strand"].apply(
lambda strand: pos_strand_pointer_y
if strand == "+"
else neg_strand_pointer_y
)

# Put the label above or below the gene rectangle, depending on + or - strand.
neg_strand_label_y = orig_mid_y_range - 1.25
pos_strand_label_y = orig_mid_y_range + 1.3
data["label_y"] = data["strand"].apply(
lambda strand: pos_strand_label_y
if strand == "+"
else neg_strand_label_y
)

# Get the data as a ColumnDataSource.
data_as_cds = bokeh.models.ColumnDataSource(data)

# Create a LabelSet for the gene pointers.
gene_pointers_ls = bokeh.models.LabelSet(
source=data_as_cds,
x="mid_x",
y="pointer_y",
text="gene_pointer",
text_align="center",
text_baseline="middle",
text_font_size="9pt",
text_color="#444444",
)

# Create a LabelSet for the gene labels.
gene_labels_ls = bokeh.models.LabelSet(
source=data_as_cds,
x="mid_x",
y="label_y",
text="gene_label",
text_align="left",
text_baseline="middle",
text_font_size="9pt",
text_color="#444444",
x_offset=8,
)

# Add the markers and labels to the figure.
fig.add_layout(gene_pointers_ls)
fig.add_layout(gene_labels_ls)

if gene_labelset:
fig.add_layout(gene_labelset)

debug("tidy up the plot")
fig.ygrid.visible = False
yticks = [0.4, 1.4]
Expand Down
10 changes: 10 additions & 0 deletions malariagen_data/anoph/gplt_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,13 @@
contig_colors_default: Final[contig_colors] = list(bokeh.palettes.d3["Category20b"][5])

colors: TypeAlias = Annotated[Sequence[str], "List of colors."]

gene_labels: TypeAlias = Annotated[
Mapping[str, str],
"A mapping of gene identifiers to custom labels, which will appear in the plot.",
]

gene_labelset: TypeAlias = Annotated[
bokeh.models.LabelSet,
"A LabelSet to use in the plot.",
]
12 changes: 12 additions & 0 deletions malariagen_data/anoph/h12.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ def plot_h12_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot GWSS track.
fig1 = self.plot_h12_gwss_track(
Expand Down Expand Up @@ -508,6 +510,8 @@ def plot_h12_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Combine plots into a single figure.
Expand Down Expand Up @@ -674,6 +678,8 @@ def plot_h12_gwss_multi_overlay(
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot GWSS track.
fig1 = self.plot_h12_gwss_multi_overlay_track(
Expand Down Expand Up @@ -710,6 +716,8 @@ def plot_h12_gwss_multi_overlay(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Combine plots into a single figure.
Expand Down Expand Up @@ -755,6 +763,8 @@ def plot_h12_gwss_multi_panel(
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
cohort_queries = self._setup_cohort_queries(
cohorts=cohorts,
Expand Down Expand Up @@ -807,6 +817,8 @@ def plot_h12_gwss_multi_panel(
x_range=figs[0].x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

figs.append(fig2)
Expand Down
4 changes: 4 additions & 0 deletions malariagen_data/anoph/h1x.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ def plot_h1x_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot GWSS track.
fig1 = self.plot_h1x_gwss_track(
Expand Down Expand Up @@ -341,6 +343,8 @@ def plot_h1x_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Combine plots into a single figure.
Expand Down
18 changes: 18 additions & 0 deletions malariagen_data/anoph/plink_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Parameters for Plink converter functions."""

from typing_extensions import Annotated, TypeAlias

overwrite: TypeAlias = Annotated[
bool,
"""
A boolean indicating whether a previously written file with the same name ought
to be overwritten. Default is False.
""",
]

output_dir: TypeAlias = Annotated[
str,
"""
A string indicating the desired output file location.
""",
]
4 changes: 4 additions & 0 deletions malariagen_data/anoph/snp_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,8 @@ def plot_snps(
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
max_snps: int = 200_000,
show: gplt_params.show = True,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot SNPs track.
fig1 = self.plot_snps_track(
Expand All @@ -1330,6 +1332,8 @@ def plot_snps(
height=genes_height,
x_range=fig1.x_range,
show=False,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Layout tracks in a grid.
Expand Down
Loading

0 comments on commit bc6de13

Please sign in to comment.