Skip to content

Commit

Permalink
use protopunica instead of pomegranate (#398)
Browse files Browse the repository at this point in the history
  • Loading branch information
alimanfoo authored May 18, 2023
1 parent 32fe830 commit 8da7b43
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 86 deletions.
16 changes: 6 additions & 10 deletions malariagen_data/anopheles.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,19 +1182,15 @@ def _roh_hmm_predict(
sample_id,
contig,
):
# conditional import, pomegranate takes a long time to install on
# linux due to lack of prebuilt wheels on PyPI
# This implementation is based on scikit-allel, but modified to use
# moving window computation of het counts.
from allel.stats.misc import tabulate_state_blocks

# this implementation is based on scikit-allel, but modified to use
# moving window computation of het counts
from allel.stats.roh import _hmm_derive_transition_matrix

# noinspection PyUnresolvedReferences
from pomegranate import ( # pyright: ignore
HiddenMarkovModel,
PoissonDistribution,
)
# Protopunica is pomegranate frozen at version 0.14.8, wich is compatible
# with the code here. Also protopunica has binary wheels available from
# PyPI and so installs much faster.
from protopunica import HiddenMarkovModel, PoissonDistribution

# het probabilities
het_px = np.concatenate([(phet_roh,), phet_nonroh])
Expand Down
66 changes: 29 additions & 37 deletions notebooks/plot_heterozygosity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,19 @@
"af1"
]
},
{
"cell_type": "markdown",
"id": "fcf38271",
"metadata": {},
"source": [
"Disable ROH plotting for now, because requires pomegranate and installation is sloooow, so would slow down CI a lot."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b2bb835",
"metadata": {},
"outputs": [],
"source": [
"# ag3.plot_roh(\n",
"# sample=\"AK0050-C\",\n",
"# region=\"3R\",\n",
"# site_mask=\"gamb_colu\",\n",
"# window_size=20_000,\n",
"# );"
"ag3.plot_roh(\n",
" sample=\"AK0050-C\",\n",
" region=\"3R\",\n",
" site_mask=\"gamb_colu\",\n",
" window_size=20_000,\n",
")"
]
},
{
Expand All @@ -62,12 +54,12 @@
"metadata": {},
"outputs": [],
"source": [
"# ag3.plot_roh(\n",
"# sample=\"AK0050-C\",\n",
"# region=\"3R\",\n",
"# site_mask=\"gamb_colu\",\n",
"# window_size=10_000,\n",
"# );"
"ag3.plot_roh(\n",
" sample=\"AK0050-C\",\n",
" region=\"3R\",\n",
" site_mask=\"gamb_colu\",\n",
" window_size=10_000,\n",
")"
]
},
{
Expand All @@ -77,12 +69,12 @@
"metadata": {},
"outputs": [],
"source": [
"# ag3.plot_roh(\n",
"# sample=\"AB0085-Cx\",\n",
"# region=\"3R\",\n",
"# site_mask=\"gamb_colu\",\n",
"# window_size=10_000,\n",
"# );"
"ag3.plot_roh(\n",
" sample=\"AB0085-Cx\",\n",
" region=\"3R\",\n",
" site_mask=\"gamb_colu\",\n",
" window_size=10_000,\n",
")"
]
},
{
Expand All @@ -92,12 +84,12 @@
"metadata": {},
"outputs": [],
"source": [
"# ag3.plot_roh(\n",
"# sample=\"AD0305-C\",\n",
"# region=\"3R\",\n",
"# site_mask=\"gamb_colu\",\n",
"# window_size=10_000,\n",
"# );"
"ag3.plot_roh(\n",
" sample=\"AD0305-C\",\n",
" region=\"3R\",\n",
" site_mask=\"gamb_colu\",\n",
" window_size=10_000,\n",
")"
]
},
{
Expand All @@ -120,7 +112,7 @@
"ag3.plot_heterozygosity(\n",
" sample=\"AB0085-Cx\",\n",
" region=\"3R\",\n",
");"
")"
]
},
{
Expand All @@ -135,7 +127,7 @@
" region=\"3R\",\n",
" site_mask=\"gamb_colu\",\n",
" window_size=30_000,\n",
");"
")"
]
},
{
Expand All @@ -158,7 +150,7 @@
"af1.plot_heterozygosity(\n",
" sample=\"VBS24195\",\n",
" region=\"3RL\",\n",
");"
")"
]
},
{
Expand All @@ -173,7 +165,7 @@
" region=\"3RL\",\n",
" site_mask=\"funestus\",\n",
" window_size=30_000,\n",
");"
")"
]
},
{
Expand Down Expand Up @@ -201,7 +193,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down
144 changes: 105 additions & 39 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ jupyter-dash = "*"
numpydoc_decorator = ">=2.0.0"
typing_extensions = "*"
typeguard = ">=4.0.0"
protopunica = "*"

[tool.poetry.dev-dependencies]
pytest = "*"
Expand Down

0 comments on commit 8da7b43

Please sign in to comment.