Skip to content

Commit

Permalink
Update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Aug 20, 2023
1 parent 101b1ad commit 4f640b6
Showing 1 changed file with 50 additions and 28 deletions.
78 changes: 50 additions & 28 deletions python/tests/test_beagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np


def convert_to_genetic_map_position(site_pos):
def convert_to_genetic_map_position(pos):
"""
Convert genomic site positions (bp) to
genetic map positions (cM).
Expand All @@ -14,11 +14,11 @@ def convert_to_genetic_map_position(site_pos):
This trivial function is meant for documentation.
:param numpy.ndarray site_pos: Genomic site positions (bp).
:param numpy.ndarray pos: Site positions (bp).
:return: Genetic map positions (cM)
:rtype: numpy.ndarray
"""
return site_pos / 1e6
return pos / 1e6


def get_weights(ref_pos, target_pos):
Expand All @@ -40,8 +40,8 @@ def get_weights(ref_pos, target_pos):
(within a specified max genetic distance). Here,
each marker is treated separately.
:param numpy.ndarray: Genomic site positions of ref. markers.
:param numpy.ndarray: Genomic site positions of target markers.
:param numpy.ndarray: Site positions of ref. markers (bp).
:param numpy.ndarray: Site positions of target markers (bp).
:return: Weights used in linear interpolation.
:rtype: numpy.ndarray
"""
Expand Down Expand Up @@ -72,11 +72,11 @@ def get_mismatch_prob(pos, miscall_rate=0.0001):
Set mismatch probabilities as per BEAGLE 4.1.
In BEAGLE 4.1, the mismatch probability is dominated by
allelic miscall rate. By default, it is set to 0.0001.
the allelic miscall rate. By default, it is set to 0.0001.
:param pos: Genomic site positions.
:param miscall_rate: Allelic miscall rate.
:return: Mismatch probabilities at each site.
:param numpy.ndarray pos: Site positions (bp).
:param float miscall_rate: Allelic miscall rate.
:return: Mismatch probabilities, one per site.
:rtype: numpy.ndarray
"""
assert 0 <= miscall_rate <= 0.5
Expand All @@ -88,10 +88,13 @@ def get_switch_prob(pos, ref_h, ne=1e6):
"""
Set switch probabilities as per BEAGLE 4.1.
:param pos: Genomic site positions.
:param ref_h: Panel of reference haplotypes.
:param ne: Effective population size.
:return: Switch probabilities at each site.
ref_h is a panel of reference haplotypes of size m x h,
where m is the number of sites and h is the number of haplotypes.
:param numpy.ndarray pos: Genomic site positions.
:param numpy.ndarray ref_h: Panel of reference haplotypes.
:param float ne: Effective population size.
:return: Switch probabilities, one per site.
:rtype: numpy.ndarray
"""
assert pos[0] > 0, "Site positions are not 1-based."
Expand All @@ -106,20 +109,27 @@ def get_switch_prob(pos, ref_h, ne=1e6):

def compute_forward_probability_matrix(ref_h, query_h, rho, mu):
"""
Implement the forward algorithm in BEAGLE 4.1.
Implement the forwards algorithm in BEAGLE 4.1.
ref_h is a panel of reference haplotypes of size m x h,
where m is the number of genotyped markers
and h is the number of haplotypes.
query_h contains only genotyped markers.
:param ref_h: Panel of reference haplotypes.
:param query_h: One query haplotype.
:param rho: Per-site switch probability.
:param mu: Per-site mismatch probability.
:param numpy.ndarray ref_h: Panel of reference haplotypes.
:param numpy.ndarray query_h: One query haplotype.
:param numpy.ndarray rho: Switch probabilities, one per site.
:param numpy.ndarray mu: Mismatch probabilities, one per site.
:return: Forward probability matrix.
:rtype: numpy.ndarray
"""
assert ref_h.shape[0] == len(query_h)
assert ref_h.shape[0] == len(rho) == len(mu)
assert np.max(rho) <= 1 and np.min(rho) >= 0
assert ref_h.shape[0] == len(rho)
assert ref_h.shape[0] == len(mu)
assert 0 <= np.min(rho) and np.max(rho) <= 1
# BEAGLE caps mismatch probabilities at 0.5.
assert np.max(mu) <= 0.5 and np.min(mu) >= 0
assert 0 <= np.min(mu) and np.max(mu) <= 0.5
m = ref_h.shape[0]
h = ref_h.shape[1]
fm = np.zeros((m, h), dtype=np.float64)
Expand All @@ -144,14 +154,15 @@ def compute_forward_probability_matrix(ref_h, query_h, rho, mu):

def compute_backward_probability_matrix(ref_h, query_h, rho, mu):
"""
Implement the backward algorithm in BEAGLE 4.1.
Implement the backwards algorithm of BEAGLE 4.1.
In BEAGLE, the values are kept one site at a time.
In BEAGLE 4.1, the values are kept one site at a time.
Here, we keep the values at all the sites.
:param ref_h: Panel of reference haplotypes.
:param query_h: One query haplotype.
:param rho: Per-site switch probability.
:param mu: Per-site mismatch probability.
:param numpy.ndarray ref_h: Panel of reference haplotypes.
:param numpy.ndarray query_h: One query haplotype.
:param numpy.ndarray rho: Switch probability for each site.
:param numpy.ndarray mu: Mismatch probability for each site.
:return: Backward probability matrix.
:rtype: numpy.ndarray
"""
Expand Down Expand Up @@ -182,7 +193,18 @@ def compute_backward_probability_matrix(ref_h, query_h, rho, mu):

def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
"""
Compute state probability matrix.
Compute HMM state probability matrix.
A HMM state is an index of a reference haplotype.
:param numpy.ndarray fm: Forward probability matrix.
:param numpy.ndarray bm: Backward probability matrix.
:param numpy.ndarray ref_h: Panel of reference haplotypes.
:param numpy.ndarray query_h: One query haplotype.
:param numpy.ndarray rho: Switch probability for each site.
:param numpy.ndarray mu: Mismatch probability for each site.
:return: HMM state probability matrix.
:rtype: numpy.ndarray
"""
assert ref_h.shape[0] == len(query_h)
assert ref_h.shape[0] == len(rho) == len(mu)
Expand Down

0 comments on commit 4f640b6

Please sign in to comment.