Skip to content

Commit

Permalink
Rework compute_alt_allele_frequencies
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Feb 26, 2024
1 parent 53ab820 commit 0bed81b
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,18 +723,34 @@ def compute_dosage_r_squared():
pass


def compute_alt_allele_frequencies(gt_probs):
def compute_allele_frequency(gt_probs, allele=1):
"""
Estimate allele frequencies from posterior genotype probabilities.
Assume the site is biallelic. 0 denotes the major allele, and 1 the minor allele.
Input are the posterior genotype probabilities at a site for n diploid individuals:
P(00), P(01 or 10), and P(11). The matrix is of size (n, 3).
See the note in "Standardized Allele-Frequency Error" in Browning & Browning (2009).
Am J Hum Genet. 84(2): 210–223. doi: 10.1016/j.ajhg.2009.01.005.
In BEAGLE 4.1, AF: "Estimated ALT Allele Frequencies".
:param np.ndarray gt_probs: Genotype probabilities at an imputed site.
:return: Estimated ALT allele frequency.
:param np.ndarray gt_probs: Genotype probabilities at a site.
:param int allele: Allele (default = 1).
:return: Estimated allele frequency.
:rtype: float
"""
expected_dosage_0 = 2 * np.sum(gt_probs[:, 0]) + np.sum(gt_probs[:, 1])
expected_dosage_1 = np.sum(gt_probs[:, 1]) + 2 * np.sum(gt_probs[:, 2])
est_af_1 = expected_dosage_1 / (expected_dosage_0 + expected_dosage_1)
return est_af_1
n = len(gt_probs)
assert n > 0, "There must be at least one individual."
assert allele in [0, 1], f"Allele {allele} is not recognized."
if allele == 0:
est_allele_count_0 = 2 * np.sum(gt_probs[:, 0]) + np.sum(gt_probs[:, 1])
return est_allele_count_0 / (2 * n)
else:
est_allele_count_1 = np.sum(gt_probs[:, 1]) + 2 * np.sum(gt_probs[:, 2])
return est_allele_count_1 / (2 * n)


def write_vcf(ref_ts, impdata, out_file, chr_name="1"):
Expand Down Expand Up @@ -807,7 +823,7 @@ def write_vcf(ref_ts, impdata, out_file, chr_name="1"):
line_str += "PASS" + "\t"
# INFO
dr2 = round(-1, 2) # TODO
af = round(compute_alt_allele_frequencies(gt_probs), 2)
af = round(compute_allele_frequency(gt_probs, allele=1), 2)
info_str = f"AR2={ar2};DR2={dr2};AF={af};IMP"
line_str += info_str + "\t"
# FORMAT
Expand Down

0 comments on commit 0bed81b

Please sign in to comment.