Skip to content

Commit

Permalink
WIP: ImpData
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Feb 25, 2024
1 parent 0cb2daa commit 7608cb4
Showing 1 changed file with 16 additions and 15 deletions.
31 changes: 16 additions & 15 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,18 @@ class ImpData:
def __post_init__(self):
assert len(self.site_pos) == self.alleles.shape[1], "Invalid data."
assert self.alleles.shape == self.allele_probs.shape, "Invalid data."
assert (
self.alleles.shape[0] % 2 == 0
), "Even number of query haplotypes is expected."

@property
def num_sites(self):
return len(self.site_pos)

@property
def num_individuals(self):
return self.alleles.shape[0] / 2


def remap_alleles(a):
"""
Expand Down Expand Up @@ -693,25 +700,19 @@ def compute_alt_allele_frequencies():
pass


def write_vcf(ref_ts, imputed_alleles, imputed_allele_probs, out_file, chr_name="1"):
def write_vcf(ref_ts, impdata, out_file, chr_name="1"):
"""
x = number of imputed sites
n = number of diploid individuals
q = number of query haplotypes in the individuals (2 * n)
:param tskit.TreeSequence ref_ts: Tree sequence with reference haplotypes.
:param numpy.ndarray imputed_site_pos: Physical positions of imputed sites.
:param numpy.ndarray imputed_alleles: Imputed alleles of query haplotypes.
:param numpy.ndarray imputed_allele_probs: Imputed allele probabilities of queries.
:param ImpData impdata: Object containing imputation data.
:param str out_file: Path to output VCF file.
:param str chr_name: Chromosome name.
:return: None
"""
assert imputed_alleles.shape == imputed_allele_probs.shape
assert imputed_alleles.shape % 2 == 0, "Even number of haplotypes is expected."
q = imputed_alleles.shape[0] # Number of query haplotypes
n = q / 2 # Number of diploid individuals
x = imputed_alleles.shape[1] # Number of imputed sites.
x = impdata.num_sites
n = impdata.num_individuals
_HEADER = [
"##fileformat=VCFv4.2",
"##INFO=<ID=AF,Number=A,Type=Float,"
Expand Down Expand Up @@ -750,10 +751,10 @@ def write_vcf(ref_ts, imputed_alleles, imputed_allele_probs, out_file, chr_name=
f.write(col_name + "\t")
for i in range(x):
gt_probs, dosages = compute_individual_scores(
imputed_alleles[i],
imputed_allele_probs[i],
imputed_alleles[i + 1],
imputed_allele_probs[i + 1],
impdata.alleles[i],
impdata.allele_probs[i],
impdata.alleles[i + 1],
impdata.allele_probs[i + 1],
)
ar2 = compute_estimated_allelic_r_squared(gt_probs)
# QUAL
Expand All @@ -767,7 +768,7 @@ def write_vcf(ref_ts, imputed_alleles, imputed_allele_probs, out_file, chr_name=
# FORMAT
format_str = ""
for j in range(n):
format_str += imputed_alleles[j] + "|" + imputed_alleles[j + 1] + ":"
format_str += impdata.alleles[j] + "|" + impdata.alleles[j + 1] + ":"
format_str += gt_probs[j] + ":"
format_str += dosages[j]
print(info_str)
Expand Down

0 comments on commit 7608cb4

Please sign in to comment.