Skip to content

Commit

Permalink
Merge pull request #653 from malariagen/GH407_allow_gene_labels
Browse files Browse the repository at this point in the history
Add gene_labels and gene_labelset to plot_genes()
  • Loading branch information
leehart authored Dec 3, 2024
2 parents 6cd964b + db100fc commit 6c27d20
Show file tree
Hide file tree
Showing 11 changed files with 243 additions and 2 deletions.
8 changes: 8 additions & 0 deletions malariagen_data/anoph/cnv_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ def plot_cnv_hmm_coverage(
line_kwargs: Optional[gplt_params.line_kwargs] = None,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -782,6 +784,8 @@ def plot_cnv_hmm_coverage(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

debug("combine plots into a single figure")
Expand Down Expand Up @@ -960,6 +964,8 @@ def plot_cnv_hmm_heatmap(
track_height: Optional[gplt_params.track_height] = None,
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -989,6 +995,8 @@ def plot_cnv_hmm_heatmap(
height=genes_height,
x_range=fig1.x_range,
show=False,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

debug("combine plots into a single figure")
Expand Down
4 changes: 4 additions & 0 deletions malariagen_data/anoph/fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ def plot_fst_gwss(
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
clip_min: fst_params.clip_min = 0.0,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# gwss track
fig1 = self.plot_fst_gwss_track(
Expand Down Expand Up @@ -327,6 +329,8 @@ def plot_fst_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# combine plots into a single figure
Expand Down
4 changes: 4 additions & 0 deletions malariagen_data/anoph/g123.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ def plot_g123_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
inline_array: base_params.inline_array = base_params.inline_array_default,
chunks: base_params.chunks = base_params.native_chunks,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# gwss track
fig1 = self.plot_g123_gwss_track(
Expand Down Expand Up @@ -472,6 +474,8 @@ def plot_g123_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# combine plots into a single figure
Expand Down
97 changes: 97 additions & 0 deletions malariagen_data/anoph/genome_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ def plot_genes(
x_range: Optional[gplt_params.x_range] = None,
title: Optional[gplt_params.title] = None,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -408,6 +410,101 @@ def plot_genes(
line_width=0,
)

if gene_labels:
debug("determine new figure height and range to accommodate gene labels")

# Increase the figure height by a certain factor, to accommodate labels.
height_increase_factor = 1.3
fig.height = int(fig.height * height_increase_factor)

# Get the original y_range.
# Note: fig.y_range is not subscriptable.
orig_y_range = fig.y_range.start, fig.y_range.end

# Determine the midpoint of the original range, to rescale outward from there.
orig_mid_y_range = (orig_y_range[0] + orig_y_range[1]) / 2
orig_y_range_extent = orig_y_range[1] - orig_y_range[0]

# Determine the new start and end points of the extended range.
new_y_range_extent = orig_y_range_extent * height_increase_factor
new_y_range_extent_half = new_y_range_extent / 2
new_y_start = orig_mid_y_range - new_y_range_extent_half
new_y_end = orig_mid_y_range + new_y_range_extent_half

# Set the new y_range.
fig.y_range = bokeh.models.Range1d(new_y_start, new_y_end)

debug("determine midpoint of each gene rectangle")
data["mid_x"] = (data["start"] + data["end"]) / 2

debug("make gene labels and pointers")

# Put gene_labels into a new column, where the gene_id matches.
# Fill unmapped genes with empty strings, otherwise "NaN" would be displayed.
data["gene_label"] = data["ID"].map(gene_labels).fillna("")

# Put gene pointers (▲ or ▼) in a new column, depending on the strand.
# Except if the gene_label is null or an empty string, which should not be shown.
data["gene_pointer"] = data.apply(
lambda row: ("▼" if row["strand"] == "+" else "▲")
if row["gene_label"]
else "",
axis=1,
)

# Put the pointer above or below the gene rectangle, depending on + or - strand.
neg_strand_pointer_y = orig_mid_y_range - 1.1
pos_strand_pointer_y = orig_mid_y_range + 1.1
data["pointer_y"] = data["strand"].apply(
lambda strand: pos_strand_pointer_y
if strand == "+"
else neg_strand_pointer_y
)

# Put the label above or below the gene rectangle, depending on + or - strand.
neg_strand_label_y = orig_mid_y_range - 1.25
pos_strand_label_y = orig_mid_y_range + 1.3
data["label_y"] = data["strand"].apply(
lambda strand: pos_strand_label_y
if strand == "+"
else neg_strand_label_y
)

# Get the data as a ColumnDataSource.
data_as_cds = bokeh.models.ColumnDataSource(data)

# Create a LabelSet for the gene pointers.
gene_pointers_ls = bokeh.models.LabelSet(
source=data_as_cds,
x="mid_x",
y="pointer_y",
text="gene_pointer",
text_align="center",
text_baseline="middle",
text_font_size="9pt",
text_color="#444444",
)

# Create a LabelSet for the gene labels.
gene_labels_ls = bokeh.models.LabelSet(
source=data_as_cds,
x="mid_x",
y="label_y",
text="gene_label",
text_align="left",
text_baseline="middle",
text_font_size="9pt",
text_color="#444444",
x_offset=8,
)

# Add the markers and labels to the figure.
fig.add_layout(gene_pointers_ls)
fig.add_layout(gene_labels_ls)

if gene_labelset:
fig.add_layout(gene_labelset)

debug("tidy up the plot")
fig.ygrid.visible = False
yticks = [0.4, 1.4]
Expand Down
10 changes: 10 additions & 0 deletions malariagen_data/anoph/gplt_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,13 @@
contig_colors_default: Final[contig_colors] = list(bokeh.palettes.d3["Category20b"][5])

colors: TypeAlias = Annotated[Sequence[str], "List of colors."]

gene_labels: TypeAlias = Annotated[
Mapping[str, str],
"A mapping of gene identifiers to custom labels, which will appear in the plot.",
]

gene_labelset: TypeAlias = Annotated[
bokeh.models.LabelSet,
"A LabelSet to use in the plot.",
]
12 changes: 12 additions & 0 deletions malariagen_data/anoph/h12.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ def plot_h12_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot GWSS track.
fig1 = self.plot_h12_gwss_track(
Expand Down Expand Up @@ -508,6 +510,8 @@ def plot_h12_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Combine plots into a single figure.
Expand Down Expand Up @@ -674,6 +678,8 @@ def plot_h12_gwss_multi_overlay(
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot GWSS track.
fig1 = self.plot_h12_gwss_multi_overlay_track(
Expand Down Expand Up @@ -710,6 +716,8 @@ def plot_h12_gwss_multi_overlay(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Combine plots into a single figure.
Expand Down Expand Up @@ -755,6 +763,8 @@ def plot_h12_gwss_multi_panel(
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
cohort_queries = self._setup_cohort_queries(
cohorts=cohorts,
Expand Down Expand Up @@ -807,6 +817,8 @@ def plot_h12_gwss_multi_panel(
x_range=figs[0].x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

figs.append(fig2)
Expand Down
4 changes: 4 additions & 0 deletions malariagen_data/anoph/h1x.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ def plot_h1x_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot GWSS track.
fig1 = self.plot_h1x_gwss_track(
Expand Down Expand Up @@ -341,6 +343,8 @@ def plot_h1x_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Combine plots into a single figure.
Expand Down
4 changes: 4 additions & 0 deletions malariagen_data/anoph/snp_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,8 @@ def plot_snps(
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
max_snps: int = 200_000,
show: gplt_params.show = True,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# Plot SNPs track.
fig1 = self.plot_snps_track(
Expand All @@ -1330,6 +1332,8 @@ def plot_snps(
height=genes_height,
x_range=fig1.x_range,
show=False,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# Layout tracks in a grid.
Expand Down
16 changes: 16 additions & 0 deletions malariagen_data/anopheles.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ def plot_heterozygosity(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -491,6 +493,8 @@ def plot_heterozygosity(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)
figs.append(fig_genes)

Expand Down Expand Up @@ -726,6 +730,8 @@ def plot_roh(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
debug = self._log.debug

Expand Down Expand Up @@ -798,6 +804,8 @@ def plot_roh(
x_range=fig_het.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)
figs.append(fig_genes)

Expand Down Expand Up @@ -2290,6 +2298,8 @@ def plot_xpehh_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# gwss track
fig1 = self.plot_xpehh_gwss_track(
Expand Down Expand Up @@ -2334,6 +2344,8 @@ def plot_xpehh_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# combine plots into a single figure
Expand Down Expand Up @@ -2391,6 +2403,8 @@ def plot_ihs_gwss(
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
gene_labels: Optional[gplt_params.gene_labels] = None,
gene_labelset: Optional[gplt_params.gene_labelset] = None,
) -> gplt_params.figure:
# gwss track
fig1 = self.plot_ihs_gwss_track(
Expand Down Expand Up @@ -2437,6 +2451,8 @@ def plot_ihs_gwss(
x_range=fig1.x_range,
show=False,
output_backend=output_backend,
gene_labels=gene_labels,
gene_labelset=gene_labelset,
)

# combine plots into a single figure
Expand Down
Loading

0 comments on commit 6c27d20

Please sign in to comment.