diff --git a/lshmm/api.py b/lshmm/api.py index 8a5faec..6137480 100644 --- a/lshmm/api.py +++ b/lshmm/api.py @@ -105,48 +105,18 @@ def checks( return n, m, ploidy -# def forwards(n, m, G_or_H, s, e, r): -# """ -# Run the Li and Stephens forwards algorithm on haplotype or -# unphased genotype data. -# """ -# template_dimensions = G_or_H.shape -# assert len(template_dimensions) in [2, 3] - -# if len(template_dimensions) == 2: -# # Haploid -# assert (G_or_H.shape == np.array([m, n])).all() -# F, c, ll = forwards_ls_hap(n, m, G_or_H, s, e, r, norm=True) -# else: -# # Diploid -# assert (G_or_H.shape == np.array([m, n, n])).all() -# F, c, ll = forward_ls_dip_loop(n, m, G_or_H, s, e, r, norm=True) - -# return F, c, ll - - -def forwards( +def set_emission_probabilities( + n, + m, reference_panel, query, - recombination_rate, - alleles=None, - mutation_rate=None, - scale_mutation_based_on_n_alleles=True, + alleles, + mutation_rate, + ploidy, + scale_mutation_based_on_n_alleles, ): - """ - Run the Li and Stephens forwards algorithm on haplotype or - unphased genotype data. - """ - n, m, ploidy = checks( - reference_panel, - query, - mutation_rate, - recombination_rate, - scale_mutation_based_on_n_alleles, - ) # Check alleles should go in here, and modify e before passing to the algorithm # If alleles is not passed, we don't perform a test of alleles, but set n_alleles based on the reference_panel. - if alleles is None: n_alleles = np.int8( [ @@ -187,14 +157,6 @@ def forwards( else: e[j, 0] = mutation_rate[j] / (n_alleles[j] - 1) e[j, 1] = 1 - mutation_rate[j] - - ( - forward_array, - normalisation_factor_from_forward, - log_likelihood, - ) = forwards_ls_hap( - n, m, reference_panel, query, e, recombination_rate, norm=True - ) else: # Diploid # Evaluate emission probabilities here, using the mutation rate - this can take a scalar or vector. @@ -206,50 +168,43 @@ def forwards( e[:, REF_HOM_OBS_HET] = 2 * mutation_rate * (1 - mutation_rate) e[:, REF_HET_OBS_HOM] = mutation_rate * (1 - mutation_rate) - ( - forward_array, - normalisation_factor_from_forward, - log_likelihood, - ) = forward_ls_dip_loop( - n, m, reference_panel, query, e, recombination_rate, norm=True - ) + return e - return forward_array, normalisation_factor_from_forward, log_likelihood +def viterbi_hap(n, m, reference_panel, query, emissions, recombination_rate): -# def backwards(n, m, G_or_H, s, e, c, r): -# """ -# Run the Li and Stephens backwards algorithm on haplotype or -# unphased genotype data. -# """ -# template_dimensions = G_or_H.shape -# assert len(template_dimensions) in [2, 3] + V, P, log_likelihood = forwards_viterbi_hap_lower_mem_rescaling( + n, m, reference_panel, query, emissions, recombination_rate + ) + most_likely_path = backwards_viterbi_hap(m, V, P) -# if len(template_dimensions) == 2: -# # Haploid -# assert (G_or_H.shape == np.array([m, n])).all() -# B = backwards_ls_hap(n, m, G_or_H, s, e, c, r) -# else: -# # Diploid -# assert (G_or_H.shape == np.array([m, n, n])).all() -# B = backward_ls_dip_loop(n, m, G_or_H, s, e, c, r) + return most_likely_path, log_likelihood -# return B +def viterbi_dip(n, m, reference_panel, query, emissions, recombination_rate): -def backwards( + V, P, log_likelihood = forwards_viterbi_dip_low_mem( + n, m, reference_panel, query, emissions, recombination_rate + ) + unphased_path = backwards_viterbi_dip(m, V, P) + most_likely_path = get_phased_path(n, unphased_path) + + return most_likely_path, log_likelihood + + +def forwards( reference_panel, query, - normalisation_factor_from_forward, recombination_rate, alleles=None, mutation_rate=None, scale_mutation_based_on_n_alleles=True, ): """ - Run the Li and Stephens backwards algorithm on haplotype or + Run the Li and Stephens forwards algorithm on haplotype or unphased genotype data. """ + n, m, ploidy = checks( reference_panel, query, @@ -257,100 +212,82 @@ def backwards( recombination_rate, scale_mutation_based_on_n_alleles, ) - # Check alleles should go in here, and mofify e before passing to the algorithm - # If alleles is not passed, we don't perform a test of alleles, but set n_alleles based on the reference_panel. - if alleles is None: - n_alleles = np.int8( - [ - len(np.unique(np.append(reference_panel[j, :], query[:, j]))) - for j in range(reference_panel.shape[0]) - ] - ) - else: - n_alleles = check_alleles(alleles, m) - if mutation_rate is None: - # Set the mutation rate to be the proposed mutation rate in Li and Stephens (2003). - theta_tilde = 1 / np.sum([1 / k for k in range(1, n - 1)]) - mutation_rate = 0.5 * (theta_tilde / (n + theta_tilde)) + emissions = set_emission_probabilities( + n, + m, + reference_panel, + query, + alleles, + mutation_rate, + ploidy, + scale_mutation_based_on_n_alleles, + ) if ploidy == 1: - # Haploid - # Evaluate emission probabilities here, using the mutation rate - this can take a scalar or vector. - e = np.zeros((m, 2)) - - if scale_mutation_based_on_n_alleles: - # Scale mutation based on the number of alleles - so the mutation rate is the mutation rate to one of the alleles. - # The overall mutation rate is then (n_alleles - 1) * mutation_rate. - e[:, 0] = mutation_rate - mutation_rate * np.equal( - n_alleles, np.ones(m) - ) # Added boolean in case we're at an invariant site - e[:, 1] = 1 - (n_alleles - 1) * mutation_rate - else: - # No scaling based on the number of alleles - so the mutation rate is the mutation rate to anything. - # Which means that we must rescale the mutation rate to a different allele, by the number of alleles. - for j in range(m): - if n_alleles[j] == 1: # In case we're at an invariant site - e[j, 0] = 0 - e[j, 1] = 1 - else: - e[j, 0] = mutation_rate[j] / (n_alleles[j] - 1) - e[j, 1] = 1 - mutation_rate[j] - - backwards_array = backwards_ls_hap( - n, - m, - reference_panel, - query, - e, - normalisation_factor_from_forward, - recombination_rate, - ) + forward_function = forwards_ls_hap else: - # Diploid - # Evaluate emission probabilities here, using the mutation rate - this can take a scalar or vector. - # DEV: there's a wrinkle here. - e = np.zeros((m, 8)) - e[:, EQUAL_BOTH_HOM] = (1 - mutation_rate) ** 2 - e[:, UNEQUAL_BOTH_HOM] = mutation_rate ** 2 - e[:, BOTH_HET] = (1 - mutation_rate) ** 2 + mutation_rate ** 2 - e[:, REF_HOM_OBS_HET] = 2 * mutation_rate * (1 - mutation_rate) - e[:, REF_HET_OBS_HOM] = mutation_rate * (1 - mutation_rate) + forward_function = forward_ls_dip_loop + + ( + forward_array, + normalisation_factor_from_forward, + log_likelihood, + ) = forward_function( + n, m, reference_panel, query, emissions, recombination_rate, norm=True + ) - backwards_array = backward_ls_dip_loop( - n, - m, - reference_panel, - query, - e, - normalisation_factor_from_forward, - recombination_rate, - ) + return forward_array, normalisation_factor_from_forward, log_likelihood - return backwards_array +def backwards( + reference_panel, + query, + normalisation_factor_from_forward, + recombination_rate, + alleles=None, + mutation_rate=None, + scale_mutation_based_on_n_alleles=True, +): + """ + Run the Li and Stephens backwards algorithm on haplotype or + unphased genotype data. + """ + n, m, ploidy = checks( + reference_panel, + query, + mutation_rate, + recombination_rate, + scale_mutation_based_on_n_alleles, + ) + + emissions = set_emission_probabilities( + n, + m, + reference_panel, + query, + alleles, + mutation_rate, + ploidy, + scale_mutation_based_on_n_alleles, + ) -# def viterbi(n, m, G_or_H, s, e, r): -# """ -# Run the Li and Stephens Viterbi algorithm on haplotype or -# unphased genotype data. -# """ -# template_dimensions = G_or_H.shape -# assert len(template_dimensions) in [2, 3] + if ploidy == 1: + backward_function = backwards_ls_hap + else: + backward_function = backward_ls_dip_loop -# if len(template_dimensions) == 2: -# # Haploid -# assert (G_or_H.shape == np.array([m, n])).all() -# V, P, ll = forwards_viterbi_hap_lower_mem_rescaling(n, m, G_or_H, s, e, r) -# path = backwards_viterbi_hap(m, V, P) -# else: -# # Diploid -# assert (G_or_H.shape == np.array([m, n, n])).all() -# V, P, ll = forwards_viterbi_dip_low_mem(n, m, G_or_H, s, e, r) -# unphased_path = backwards_viterbi_dip(m, V, P) -# path = get_phased_path(n, unphased_path) + backwards_array = backward_function( + n, + m, + reference_panel, + query, + emissions, + normalisation_factor_from_forward, + recombination_rate, + ) -# return path, ll + return backwards_array def viterbi( @@ -373,71 +310,29 @@ def viterbi( scale_mutation_based_on_n_alleles, ) - if alleles is None: - n_alleles = np.int8( - [ - len(np.unique(np.append(reference_panel[j, :], query[:, j]))) - for j in range(reference_panel.shape[0]) - ] - ) - else: - n_alleles = check_alleles(alleles, m) - - if mutation_rate is None: - # Set the mutation rate to be the proposed mutation rate in Li and Stephens (2003). - theta_tilde = 1 / np.sum([1 / k for k in range(1, n - 1)]) - mutation_rate = 0.5 * (theta_tilde / (n + theta_tilde)) + emissions = set_emission_probabilities( + n, + m, + reference_panel, + query, + alleles, + mutation_rate, + ploidy, + scale_mutation_based_on_n_alleles, + ) if ploidy == 1: - # Haploid - # Evaluate emission probabilities here, using the mutation rate - this can take a scalar or vector. - e = np.zeros((m, 2)) - - if scale_mutation_based_on_n_alleles: - # Scale mutation based on the number of alleles - so the mutation rate is the mutation rate to one of the alleles. - # The overall mutation rate is then (n_alleles - 1) * mutation_rate. - e[:, 0] = mutation_rate - mutation_rate * np.equal( - n_alleles, np.ones(m) - ) # Added boolean in case we're at an invariant site - e[:, 1] = 1 - (n_alleles - 1) * mutation_rate - else: - # No scaling based on the number of alleles - so the mutation rate is the mutation rate to anything. - # Which means that we must rescale the mutation rate to a different allele, by the number of alleles. - for j in range(m): - if n_alleles[j] == 1: # In case we're at an invariant site - e[j, 0] = 0 - e[j, 1] = 1 - else: - e[j, 0] = mutation_rate[j] / (n_alleles[j] - 1) - e[j, 1] = 1 - mutation_rate[j] - - V, P, log_likelihood = forwards_viterbi_hap_lower_mem_rescaling( - n, m, reference_panel, query, e, recombination_rate - ) - most_likely_path = backwards_viterbi_hap(m, V, P) + viterbi_function = viterbi_hap else: - # Diploid - # Evaluate emission probabilities here, using the mutation rate - this can take a scalar or vector. - # DEV: there's a wrinkle here. - e = np.zeros((m, 8)) - e[:, EQUAL_BOTH_HOM] = (1 - mutation_rate) ** 2 - e[:, UNEQUAL_BOTH_HOM] = mutation_rate ** 2 - e[:, BOTH_HET] = (1 - mutation_rate) ** 2 + mutation_rate ** 2 - e[:, REF_HOM_OBS_HET] = 2 * mutation_rate * (1 - mutation_rate) - e[:, REF_HET_OBS_HOM] = mutation_rate * (1 - mutation_rate) + viterbi_function = viterbi_dip - V, P, log_likelihood = forwards_viterbi_dip_low_mem( - n, m, reference_panel, query, e, recombination_rate - ) - unphased_path = backwards_viterbi_dip(m, V, P) - most_likely_path = get_phased_path(n, unphased_path) + most_likely_path, log_likelihood = viterbi_function( + n, m, reference_panel, query, emissions, recombination_rate + ) return most_likely_path, log_likelihood -# Finally, need to include a function to evaluate the likelihood of a given path - - def path_ll( reference_panel, query, @@ -455,58 +350,25 @@ def path_ll( recombination_rate, scale_mutation_based_on_n_alleles, ) - # Check alleles should go in here, and mofify e before passing to the algorithm - # If alleles is not passed, we don't perform a test of alleles, but set n_alleles based on the reference_panel. - if alleles is None: - n_alleles = np.int8( - [ - len(np.unique(np.append(reference_panel[j, :], query[:, j]))) - for j in range(reference_panel.shape[0]) - ] - ) - else: - n_alleles = check_alleles(alleles, m) - if mutation_rate is None: - # Set the mutation rate to be the proposed mutation rate in Li and Stephens (2003). - theta_tilde = 1 / np.sum([1 / k for k in range(1, n - 1)]) - mutation_rate = 0.5 * (theta_tilde / (n + theta_tilde)) + emissions = set_emission_probabilities( + n, + m, + reference_panel, + query, + alleles, + mutation_rate, + ploidy, + scale_mutation_based_on_n_alleles, + ) if ploidy == 1: - # Haploid - # Evaluate emission probabilities here, using the mutation rate - this can take a scalar or vector. - e = np.zeros((m, 2)) - - if scale_mutation_based_on_n_alleles: - # Scale mutation based on the number of alleles - so the mutation rate is the mutation rate to one of the alleles. - # The overall mutation rate is then (n_alleles - 1) * mutation_rate. - e[:, 0] = mutation_rate - mutation_rate * np.equal( - n_alleles, np.ones(m) - ) # Added boolean in case we're at an invariant site - e[:, 1] = 1 - (n_alleles - 1) * mutation_rate - else: - # No scaling based on the number of alleles - so the mutation rate is the mutation rate to anything. - # Which means that we must rescale the mutation rate to a different allele, by the number of alleles. - for j in range(m): - if n_alleles[j] == 1: # In case we're at an invariant site - e[j, 0] = 0 - e[j, 1] = 1 - else: - e[j, 0] = mutation_rate[j] / (n_alleles[j] - 1) - e[j, 1] = 1 - mutation_rate[j] - - ll = path_ll_hap(n, m, reference_panel, path, query, e, recombination_rate) + path_ll_function = path_ll_hap else: - # Diploid - # Evaluate emission probabilities here, using the mutation rate - this can take a scalar or vector. - # DEV: there's a wrinkle here. - e = np.zeros((m, 8)) - e[:, EQUAL_BOTH_HOM] = (1 - mutation_rate) ** 2 - e[:, UNEQUAL_BOTH_HOM] = mutation_rate ** 2 - e[:, BOTH_HET] = (1 - mutation_rate) ** 2 + mutation_rate ** 2 - e[:, REF_HOM_OBS_HET] = 2 * mutation_rate * (1 - mutation_rate) - e[:, REF_HET_OBS_HOM] = mutation_rate * (1 - mutation_rate) + path_ll_function = path_ll_dip - ll = path_ll_dip(n, m, reference_panel, path, query, e, recombination_rate) + ll = path_ll_function( + n, m, reference_panel, path, query, emissions, recombination_rate + ) return ll diff --git a/lshmm/forward_backward/fb_diploid_samples_variants.py b/lshmm/forward_backward/fb_diploid_samples_variants.py index dee32f2..c65ada3 100644 --- a/lshmm/forward_backward/fb_diploid_samples_variants.py +++ b/lshmm/forward_backward/fb_diploid_samples_variants.py @@ -47,7 +47,7 @@ def np_argmax(array, axis): return np_apply_along_axis(np.argmax, axis, array) -@nb.jit +@nb.njit def forwards_ls_dip(n, m, G, s, e, r, norm=True): """Matrix based diploid LS forward algorithm using numpy vectorisation.""" # Initialise the forward tensor @@ -121,7 +121,7 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True): return F, c, ll -@nb.jit +@nb.njit def backwards_ls_dip(n, m, G, s, e, c, r): """Matrix based diploid LS backward algorithm using numpy vectorisation.""" # Initialise the backward tensor @@ -161,7 +161,7 @@ def backwards_ls_dip(n, m, G, s, e, c, r): return B -@nb.jit +@nb.njit def forward_ls_dip_starting_point(n, m, G, s, e, r): """Naive implementation of LS diploid forwards algorithm.""" # Initialise the forward tensor @@ -234,7 +234,7 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r): return F, ll -@nb.jit +@nb.njit def backward_ls_dip_starting_point(n, m, G, s, e, r): """Naive implementation of LS diploid backwards algorithm.""" # Backwards @@ -310,7 +310,7 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r): return B -@nb.jit +@nb.njit def forward_ls_dip_loop(n, m, G, s, e, r, norm=True): """LS diploid forwards algoritm without vectorisation.""" # Initialise the forward tensor @@ -422,7 +422,7 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True): return F, c, ll -@nb.jit +@nb.njit def backward_ls_dip_loop(n, m, G, s, e, c, r): """LS diploid backwards algoritm without vectorisation.""" # Initialise the backward tensor diff --git a/lshmm/forward_backward/fb_diploid_variants_samples.py b/lshmm/forward_backward/fb_diploid_variants_samples.py index e376523..231d385 100644 --- a/lshmm/forward_backward/fb_diploid_variants_samples.py +++ b/lshmm/forward_backward/fb_diploid_variants_samples.py @@ -148,10 +148,6 @@ def backwards_ls_dip(n, m, G, s, e, c, r): ) # One changes - # sum_j1 = np.tile(np.sum(B[l+1,:,:] * e[l+1, index], 0, keepdims=True), (n,1)) - # sum_j1 = np_sum(B[l + 1, :, :], 0).repeat(n).reshape((-1, n)).T - # sum_j2 = np.tile(np.sum(B[l+1,:,:] * e[l+1, index], 1, keepdims=True), (1,n)) - # sum_j2 = np_sum(B[l + 1, :, :], 1).repeat(n).reshape((-1, n)) sum_j = np_sum(B[l + 1, :, :] * e[l + 1, index], 0).repeat(n).reshape((-1, n)) B[l, :, :] += ((1 - r[l + 1]) * r_n[l + 1]) * (sum_j + sum_j.T) B[l, :, :] *= 1 / c[l + 1] @@ -159,83 +155,6 @@ def backwards_ls_dip(n, m, G, s, e, c, r): return B -# def forward_ls_dip_starting_point(n, m, G, s, e, r): -# ''' -# Unbelievably naive implementation of LS diploid forwards. Just to get something down -# that works. -# ''' - -# # Initialise the forward tensor -# F = np.zeros((m,n,n)) -# F[0,:,:] = 1/(n**2) -# index = ( -# 4*np.equal(G[0,:,:], s[0,0]).astype(np.int64) + -# 2*(G[0,:,:] == 1).astype(np.int64) + -# np.int64(s[0,0] == 1) -# ) -# F[0,:,:] *= e[0, index] -# r_n = r/n - -# for l in range(1,m): - -# # Determine the various components -# F_no_change = np.zeros((n,n)) -# F_j1_change = np.zeros(n) -# F_j2_change = np.zeros(n) -# F_both_change = 0 - -# for j1 in range(n): -# for j2 in range(n): -# F_no_change[j1, j2] = (1-r[l])**2 * F[l-1, j1, j2] - -# for j1 in range(n): -# for j2 in range(n): -# F_both_change += r_n[l]**2 * F[l-1, j1, j2] - -# for j1 in range(n): -# for j2 in range(n): # This is the variable to sum over - it changes -# F_j2_change[j1] += (1 - r[l]) * r_n[l] * F[l-1, j1, j2] - -# for j2 in range(n): -# for j1 in range(n): # This is the variable to sum over - it changes -# F_j1_change[j2] += (1 - r[l]) * r_n[l] * F[l-1, j1, j2] - -# F[l,:,:] = F_both_change - -# for j1 in range(n): -# F[l, j1, :] += F_j2_change - -# for j2 in range(n): -# F[l, :, j2] += F_j1_change - -# for j1 in range(n): -# for j2 in range(n): -# F[l, j1, j2] += F_no_change[j1, j2] - -# for j1 in range(n): -# for j2 in range(n): -# # What is the emission? -# if s[0,l] == 1: -# # OBS is het -# if G[l, j1, j2] == 1: # REF is het -# F[l, j1, j2] *= e[l,BOTH_HET] -# else: # REF is hom -# F[l, j1, j2] *= e[l,REF_HOM_OBS_HET] -# else: -# # OBS is hom -# if G[l, j1, j2] == 1: # REF is het -# F[l, j1, j2] *= e[l,REF_HET_OBS_HOM] -# else: # REF is hom -# if G[l, j1, j2] == s[0,l]: # Equal -# F[l, j1, j2] *= e[l,EQUAL_BOTH_HOM] -# else: # Unequal -# F[l, j1, j2] *= e[l,UNEQUAL_BOTH_HOM] - -# ll = np.log10(np.sum(F[l,:,:])) - -# return F, ll - - @nb.njit def forward_ls_dip_starting_point(n, m, G, s, e, r): """Naive implementation of LS diploid forwards algorithm.""" @@ -312,79 +231,6 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r): return F, ll -# def backward_ls_dip_starting_point(n, m, G, s, e, r): -# ''' -# Unbelievably naive implementation of LS diploid backwards. Just to get something down -# that works. -# ''' - -# # Backwards -# B = np.zeros((m,n,n)) - -# # Initialise -# B[m-1, :, :] = 1 -# r_n = r/n - -# for l in range(m-2, -1, -1): - -# # Determine the various components -# B_no_change = np.zeros((n,n)) -# B_j1_change = np.zeros(n) -# B_j2_change = np.zeros(n) -# B_both_change = 0 - -# # Evaluate the emission matrix at this site, for all pairs -# e_tmp = np.zeros((n,n)) -# for j1 in range(n): -# for j2 in range(n): -# # What is the emission? -# if s[0,l+1] == 1: -# # OBS is het -# if G[l+1, j1, j2] == 1: # REF is het -# e_tmp[j1, j2] = e[l+1, BOTH_HET] -# else: # REF is hom -# e_tmp[j1, j2] = e[l+1, REF_HOM_OBS_HET] -# else: -# # OBS is hom -# if G[l+1, j1, j2] == 1: # REF is het -# e_tmp[j1, j2] = e[l+1,REF_HET_OBS_HOM] -# else: # REF is hom -# if G[l+1, j1, j2] == s[0,l+1]: # Equal -# e_tmp[j1, j2] = e[l+1,EQUAL_BOTH_HOM] -# else: # Unequal -# e_tmp[j1, j2] = e[l+1,UNEQUAL_BOTH_HOM] - -# for j1 in range(n): -# for j2 in range(n): -# B_no_change[j1, j2] = (1-r[l+1])**2 * B[l+1,j1,j2] * e_tmp[j1, j2] - -# for j1 in range(n): -# for j2 in range(n): -# B_both_change += r_n[l+1]**2 * e_tmp[j1, j2] * B[l+1,j1,j2] - -# for j1 in range(n): -# for j2 in range(n): # This is the variable to sum over - it changes -# B_j2_change[j1] += (1 - r[l+1]) * r_n[l+1] * B[l+1,j1,j2] * e_tmp[j1, j2] - -# for j2 in range(n): -# for j1 in range(n): # This is the variable to sum over - it changes -# B_j1_change[j2] += (1 - r[l+1]) * r_n[l+1] * B[l+1,j1,j2] * e_tmp[j1, j2] - -# B[l,:,:] = B_both_change - -# for j1 in range(n): -# B[l, j1, :] += B_j2_change - -# for j2 in range(n): -# B[l, :, j2] += B_j1_change - -# for j1 in range(n): -# for j2 in range(n): -# B[l, j1, j2] += B_no_change[j1, j2] - -# return B - - @nb.njit def backward_ls_dip_starting_point(n, m, G, s, e, r): """Naive implementation of LS diploid backwards algorithm.""" @@ -461,68 +307,6 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r): return B -# def forward_ls_dip_loop(n, m, G, s, e, r): -# ''' -# LS diploid forwards with lots of loops. -# ''' - -# # Initialise the forward tensor -# F = np.zeros((m,n,n)) -# F[0,:,:] = 1/(n**2) -# index = ( -# 4*np.equal(G[0,:,:], s[0,0]).astype(np.int64) + -# 2*(G[0,:,:] == 1).astype(np.int64) + -# np.int64(s[0,0] == 1) -# ) -# F[0,:,:] *= e[0, index] -# r_n = r/n - -# for l in range(1,m): - -# # Determine the various components -# F_no_change = np.zeros((n,n)) -# F_j1_change = np.zeros(n) -# F_j2_change = np.zeros(n) -# F_both_change = 0 - -# for j1 in range(n): -# for j2 in range(n): -# F_no_change[j1, j2] = (1-r[l])**2 * F[l-1,j1,j2] -# F_j1_change[j1] += (1 - r[l]) * r_n[l] * F[l-1,j2,j1] -# F_j2_change[j1] += (1 - r[l]) * r_n[l] * F[l-1,j1,j2] -# F_both_change += r_n[l]**2 * F[l-1,j1,j2] - -# F[l,:,:] = F_both_change - -# for j1 in range(n): -# F[l, j1, :] += F_j2_change -# F[l, :, j1] += F_j1_change -# for j2 in range(n): -# F[l, j1, j2] += F_no_change[j1, j2] - -# for j1 in range(n): -# for j2 in range(n): -# # What is the emission? -# if s[0,l] == 1: -# # OBS is het -# if G[l, j1, j2] == 1: # REF is het -# F[l, j1, j2] *= e[l, BOTH_HET] -# else: # REF is hom -# F[l, j1, j2] *= e[l, REF_HOM_OBS_HET] -# else: -# # OBS is hom -# if G[l, j1, j2] == 1: # REF is het -# F[l, j1, j2] *= e[l, REF_HET_OBS_HOM] -# else: # REF is hom -# if G[l, j1, j2] == s[0,l]: # Equal -# F[l, j1, j2] *= e[l, EQUAL_BOTH_HOM] -# else: # Unequal -# F[l, j1, j2] *= e[l, UNEQUAL_BOTH_HOM] - -# ll = np.log10(np.sum(F[l,:,:])) -# return F, ll - - @nb.njit def forward_ls_dip_loop(n, m, G, s, e, r, norm=True): """LS diploid forwards algoritm without vectorisation.""" @@ -637,63 +421,6 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True): return F, c, ll -# def backward_ls_dip_loop(n, m, G, s, e, r): -# ''' -# LS diploid backwards with lots of loops. -# ''' - -# # Initialise the backward tensor -# B = np.zeros((m,n,n)) -# B[m-1, :, :] = 1 -# r_n = r/n - -# for l in range(m-2, -1, -1): - -# # Determine the various components -# B_no_change = np.zeros((n,n)) -# B_j1_change = np.zeros(n) -# B_j2_change = np.zeros(n) -# B_both_change = 0 - -# # Evaluate the emission matrix at this site, for all pairs -# e_tmp = np.zeros((n,n)) -# for j1 in range(n): -# for j2 in range(n): -# # What is the emission? -# if s[0,l+1] == 1: -# # OBS is het -# if G[l+1, j1, j2] == 1: # REF is het -# e_tmp[j1, j2] = e[l+1, BOTH_HET] -# else: # REF is hom -# e_tmp[j1, j2] = e[l+1, REF_HOM_OBS_HET] -# else: -# # OBS is hom -# if G[l+1, j1, j2] == 1: # REF is het -# e_tmp[j1, j2] = e[l+1, REF_HET_OBS_HOM] -# else: # REF is hom -# if G[l+1, j1, j2] == s[0,l+1]: # Equal -# e_tmp[j1, j2] = e[l+1, EQUAL_BOTH_HOM] -# else: # Unequal -# e_tmp[j1, j2] = e[l+1, UNEQUAL_BOTH_HOM] - -# for j1 in range(n): -# for j2 in range(n): -# B_no_change[j1, j2] = (1-r[l+1])**2 * B[l+1,j1,j2] * e_tmp[j1, j2] -# B_j2_change[j1] += (1 - r[l+1]) * r_n[l+1] * B[l+1,j1,j2] * e_tmp[j1, j2] -# B_j1_change[j1] += (1 - r[l+1]) * r_n[l+1] * B[l+1,j2,j1] * e_tmp[j2, j1] -# B_both_change += r_n[l+1]**2 * e_tmp[j1, j2] * B[l+1,j1,j2] - -# B[l,:,:] = B_both_change - -# for j1 in range(n): -# B[l, j1, :] += B_j2_change -# B[l, :, j1] += B_j1_change -# for j2 in range(n): -# B[l, j1, j2] += B_no_change[j1, j2] - -# return B - - @nb.njit def backward_ls_dip_loop(n, m, G, s, e, c, r): """LS diploid backwards algoritm without vectorisation.""" diff --git a/lshmm/forward_backward/fb_haploid_samples_variants.py b/lshmm/forward_backward/fb_haploid_samples_variants.py index 580f63e..683412a 100644 --- a/lshmm/forward_backward/fb_haploid_samples_variants.py +++ b/lshmm/forward_backward/fb_haploid_samples_variants.py @@ -3,7 +3,7 @@ import numpy as np -@nb.jit +@nb.njit def forwards_ls_hap(n, m, H, s, e, r, norm=True): """Matrix based haploid LS forward algorithm using numpy vectorisation.""" # Initialise @@ -39,7 +39,7 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True): return F, c, ll -@nb.jit +@nb.njit def backwards_ls_hap(n, m, H, s, e, c, r): """Matrix based haploid LS backward algorithm using numpy vectorisation.""" # Initialise diff --git a/lshmm/forward_backward/fb_haploid_variants_samples.py b/lshmm/forward_backward/fb_haploid_variants_samples.py index 0ea5b1d..f35dac1 100644 --- a/lshmm/forward_backward/fb_haploid_variants_samples.py +++ b/lshmm/forward_backward/fb_haploid_variants_samples.py @@ -2,49 +2,13 @@ import numba as nb import numpy as np -# def forwards_ls_hap(n, m, H, s, e, r, norm=True): -# ''' -# Simple matrix based method for LS forward algorithm using numpy vectorisation. -# ''' -# # Initialise -# F = np.zeros((m,n)) -# c = np.ones(m) -# F[0,:] = 1/n * e[0, np.equal(H[0, :], s[0,0]).astype(np.int64)] -# r_n = r/n - -# if norm: - -# c[0] = np.sum(F[0,:]) -# F[0,:] *= 1/c[0] - -# # Forwards pass -# for l in range(1,m): -# F[l,:] = F[l-1,:] * (1 - r[l]) + r_n[l] # Don't need to multiply by r_n[l] F[:,l-1] as we've normalised. -# F[l,:] *= e[l,np.equal(H[l,:], s[0,l]).astype(np.int64)] -# c[l] = np.sum(F[l,:]) -# F[l,:] *= 1/c[l] - -# ll = np.sum(np.log10(c)) - -# else: -# # Forwards pass -# for l in range(1,m): -# F[l,:] = F[l-1,:] * (1 - r[l]) + np.sum(F[l-1,:]) * r_n[l] -# F[l,:] *= e[l, np.equal(H[l,:], s[0,l]).astype(np.int64)] - -# ll = np.log10(np.sum(F[m-1,:])) - -# return F, c, ll - - -@nb.jit +@nb.njit def forwards_ls_hap(n, m, H, s, e, r, norm=True): """Matrix based haploid LS forward algorithm using numpy vectorisation.""" # Initialise F = np.zeros((m, n)) r_n = r / n - print("running") if norm: @@ -87,26 +51,7 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True): return F, c, ll -# def backwards_ls_hap(n, m, H, s, e, c, r): -# ''' -# Simple matrix based method for LS backwards algorithm using numpy vectorisation. -# ''' - -# # Initialise -# B = np.zeros((m,n)) -# B[m-1,:] = 1 -# r_n = r/n - -# # Backwards pass -# for l in range(m-2, -1, -1): -# B[l,:] = r_n[l+1] * np.sum(e[l+1, np.equal(H[l+1,:], s[0,l+1]).astype(np.int64)] * B[l+1,:]) -# B[l,:] += (1 - r[l+1]) * e[l+1, np.equal(H[l+1,:], s[0,l+1]).astype(np.int64)] * B[l+1,:] -# B[l,:] *= 1/c[l+1] - -# return B - - -@nb.jit +@nb.njit def backwards_ls_hap(n, m, H, s, e, c, r): """Matrix based haploid LS backward algorithm using numpy vectorisation.""" # Initialise diff --git a/lshmm/forward_backward/fb_haploid_variants_samples_multiallelic.py b/lshmm/forward_backward/fb_haploid_variants_samples_multiallelic.py deleted file mode 100644 index 171aeb8..0000000 --- a/lshmm/forward_backward/fb_haploid_variants_samples_multiallelic.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Collection of functions to run forwards and backwards algorithms on haploid genotype data, where the data is structured as variants x samples.""" -import numba as nb -import numpy as np - - -def check_alleles(alleles, m): - """ - Checks the specified allele list and returns a list of lists - of alleles of length num_sites. - If alleles is a 1D list of strings, assume that this list is used - for each site and return num_sites copies of this list. - Otherwise, raise a ValueError if alleles is not a list of length - num_sites. - """ - if isinstance(alleles[0], str): - return np.int8([len(alleles) for _ in range(m)]) - if len(alleles) != m: - raise ValueError("Malformed alleles list") - n_alleles = np.int8([(len(alleles_site)) for alleles_site in alleles]) - return n_alleles - - -@nb.jit -def forwards_ls_hap(n, m, n_alleles, H, s, e, r, norm=True): - """Matrix based haploid LS forward algorithm using numpy vectorisation.""" - # Initialise - F = np.zeros((m, n)) - r_n = r / n - - if norm: - - c = np.zeros(m) - for i in range(n): - F[0, i] = 1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]))] - c[0] += F[0, i] - - for i in range(n): - F[0, i] *= 1 / c[0] - - # Forwards pass - for l in range(1, m): - for i in range(n): - F[l, i] = F[l - 1, i] * (1 - r[l]) + r_n[l] - F[l, i] *= e[l, np.int64(np.equal(H[l, i], s[0, l]))] - c[l] += F[l, i] - - for i in range(n): - F[l, i] *= 1 / c[l] - - ll = np.sum(np.log10(c)) - - else: - - c = np.ones(m) - - for i in range(n): - F[0, i] = 1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]))] - - # Forwards pass - for l in range(1, m): - for i in range(n): - F[l, i] = F[l - 1, i] * (1 - r[l]) + np.sum(F[l - 1, :]) * r_n[l] - F[l, i] *= e[l, np.int64(np.equal(H[l, i], s[0, l]))] - - ll = np.log10(np.sum(F[m - 1, :])) - - return F, c, ll - - -@nb.jit -def backwards_ls_hap(n, m, n_alleles, H, s, e, c, r): - """Matrix based haploid LS backward algorithm using numpy vectorisation.""" - # Initialise - # alleles = check_alleles(alleles, m) - B = np.zeros((m, n)) - for i in range(n): - B[m - 1, i] = 1 - r_n = r / n - - # Backwards pass - for l in range(m - 2, -1, -1): - tmp_B = np.zeros(n) - tmp_B_sum = 0 - for i in range(n): - tmp_B[i] = ( - e[l + 1, np.int64(np.equal(H[l + 1, i], s[0, l + 1]))] * B[l + 1, i] - ) - tmp_B_sum += tmp_B[i] - for i in range(n): - B[l, i] = r_n[l + 1] * tmp_B_sum - B[l, i] += (1 - r[l + 1]) * tmp_B[i] - B[l, i] *= 1 / c[l + 1] - - return B - - -def forwards_ls_hap_wrapper(n, m, alleles, H, s, e, r, norm=True): - n_alleles = check_alleles(alleles, m) - F, c, ll = forwards_ls_hap(n, m, n_alleles, H, s, e, r, norm) - return F, c, ll - - -def backwards_ls_hap_wrapper(n, m, alleles, H, s, e, c, r): - n_alleles = check_alleles(alleles, m) - B = backwards_ls_hap(n, m, n_alleles, H, s, e, c, r) - return B diff --git a/lshmm/vit_diploid_samples_variants.py b/lshmm/vit_diploid_samples_variants.py index fbf542d..4eb849b 100644 --- a/lshmm/vit_diploid_samples_variants.py +++ b/lshmm/vit_diploid_samples_variants.py @@ -38,7 +38,7 @@ def np_argmax(array, axis): return np_apply_along_axis(np.argmax, axis, array) -@nb.jit +@nb.njit def forwards_viterbi_dip_naive(n, m, G, s, e, r): """Naive implementation of LS diploid Viterbi algorithm.""" # Initialise @@ -84,7 +84,7 @@ def forwards_viterbi_dip_naive(n, m, G, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r): """Naive implementation of LS diploid Viterbi algorithm, with reduced memory.""" # Initialise @@ -132,7 +132,7 @@ def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_dip_low_mem(n, m, G, s, e, r): """LS diploid Viterbi algorithm, with reduced memory.""" # Initialise @@ -207,7 +207,7 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r): """Vectorised LS diploid Viterbi algorithm using numpy.""" # Initialise @@ -290,7 +290,7 @@ def forwards_viterbi_dip_naive_full_vec(n, m, G, s, e, r): return V, P, ll -@nb.jit +@nb.njit def backwards_viterbi_dip(m, V_last, P): """Run a backwards pass to determine the most likely path.""" assert V_last.ndim == 2 @@ -311,7 +311,7 @@ def get_phased_path(n, path): return np.unravel_index(path, (n, n)) -@nb.jit +@nb.njit def path_ll_dip(n, m, G, phased_path, s, e, r): """Evaluate log-likelihood path through a reference panel which results in sequence s.""" index = ( diff --git a/lshmm/vit_diploid_variants_samples.py b/lshmm/vit_diploid_variants_samples.py index b2b7afb..9e8ebae 100644 --- a/lshmm/vit_diploid_variants_samples.py +++ b/lshmm/vit_diploid_variants_samples.py @@ -38,49 +38,6 @@ def np_argmax(array, axis): return np_apply_along_axis(np.argmax, axis, array) -# def forwards_viterbi_dip_naive(n, m, G, s, e, r): - -# # Initialise -# V = np.zeros((m, n, n)) -# P = np.zeros((m, n, n)).astype(np.int64) -# c = np.ones(m) -# index = ( -# 4*np.equal(G[0,:,:], s[0,0]).astype(np.int64) + -# 2*(G[0,:,:] == 1).astype(np.int64) + -# np.int64(s[0,0] == 1) -# ) -# V[0,:,:] = 1/(n**2) * e[0,index] -# r_n = r/n - -# for l in range(1,m): -# index = ( -# 4*np.equal(G[l,:,:], s[0,l]).astype(np.int64) + -# 2*(G[l,:,:] == 1).astype(np.int64) + -# np.int64(s[0,l] == 1) -# ) -# for j1 in range(n): -# for j2 in range(n): -# # Get the vector to maximise over -# v = np.zeros((n,n)) -# for k1 in range(n): -# for k2 in range(n): -# v[k1, k2] = V[l-1,k1, k2] -# if ((k1 == j1) and (k2 == j2)): -# v[k1, k2] *= ((1 - r[l])**2 + 2*(1-r[l]) * r_n[l] + r_n[l]**2) -# elif ((k1 == j1) or (k2 == j2)): -# v[k1, k2] *= (r_n[l] * (1 - r[l]) + r_n[l]**2) -# else: -# v[k1, k2] *= r_n[l]**2 -# V[l,j1,j2] = np.amax(v) * e[l, index[j1, j2]] -# P[l,j1,j2] = np.argmax(v) -# c[l] = np.amax(V[l,:,:]) -# V[l,:,:] *= 1/c[l] - -# ll = np.sum(np.log10(c)) - -# return V, P, ll - - @nb.njit def forwards_viterbi_dip_naive(n, m, G, s, e, r): """Naive implementation of LS diploid Viterbi algorithm.""" @@ -130,51 +87,6 @@ def forwards_viterbi_dip_naive(n, m, G, s, e, r): return V, P, ll -# def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r): - -# # Initialise -# V = np.zeros((n,n)) -# P = np.zeros((m,n,n)).astype(np.int64) -# c = np.ones(m) -# index = ( -# 4*np.equal(G[0,:,:], s[0,0]).astype(np.int64) + -# 2*(G[0,:,:] == 1).astype(np.int64) + -# np.int64(s[0,0] == 1) -# ) -# V_previous = 1/(n**2) * e[0,index] -# r_n = r/n - -# # Take a look at Haploid Viterbi implementation in Jeromes code and see if we can pinch some ideas. -# # Diploid Viterbi, with smaller memory footprint. -# for l in range(1,m): -# index = ( -# 4*np.equal(G[l,:,:], s[0,l]).astype(np.int64) + -# 2*(G[l,:,:] == 1).astype(np.int64) + -# np.int64(s[0,l] == 1) -# ) -# for j1 in range(n): -# for j2 in range(n): -# # Get the vector to maximise over -# v = np.zeros((n,n)) -# for k1 in range(n): -# for k2 in range(n): -# v[k1, k2] = V_previous[k1, k2] -# if ((k1 == j1) and (k2 == j2)): -# v[k1, k2] *= ((1 - r[l])**2 + 2*(1-r[l]) * r_n[l] + r_n[l]**2) -# elif ((k1 == j1) or (k2 == j2)): -# v[k1, k2] *= (r_n[l] * (1 - r[l]) + r_n[l]**2) -# else: -# v[k1, k2] *= r_n[l]**2 -# V[j1,j2] = np.amax(v) * e[l,index[j1, j2]] -# P[l,j1,j2] = np.argmax(v) -# c[l] = np.amax(V) -# V_previous = np.copy(V) / c[l] - -# ll = np.sum(np.log10(c)) - -# return V, P, ll - - @nb.njit def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r): """Naive implementation of LS diploid Viterbi algorithm, with reduced memory.""" @@ -227,78 +139,6 @@ def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r): return V, P, ll -# def forwards_viterbi_dip_low_mem(n, m, G, s, e, r): - -# # Initialise -# V = np.zeros((n, n)) -# P = np.zeros((m,n,n)).astype(np.int64) -# index = ( -# 4*np.equal(G[0,:,:], s[0,0]).astype(np.int64) + -# 2*(G[0,:,:] == 1).astype(np.int64) + -# np.int64(s[0,0] == 1) -# ) -# V_previous = 1/(n**2) * e[0,index] -# c = np.ones(m) -# r_n = r/n - -# # Diploid Viterbi, with smaller memory footprint, rescaling, and using the structure of the HMM. -# for l in range(1,m): - -# index = ( -# 4*np.equal(G[l,:,:], s[0,l]).astype(np.int64) + -# 2*(G[l,:,:] == 1).astype(np.int64) + -# np.int64(s[0,l] == 1) -# ) - -# c[l] = np.amax(V_previous) -# argmax = np.argmax(V_previous) - -# V_previous *= 1/c[l] -# V_rowcol_max = np_amax(V_previous, 0) -# arg_rowcol_max = np_argmax(V_previous, 0) - -# no_switch = (1 - r[l])**2 + 2*(r_n[l]*(1 - r[l])) + r_n[l]**2 -# single_switch = r_n[l]*(1 - r[l]) + r_n[l]**2 -# double_switch = r_n[l]**2 - -# j1_j2 = 0 - -# for j1 in range(n): -# for j2 in range(n): - -# V_single_switch = max(V_rowcol_max[j1], V_rowcol_max[j2]) -# P_single_switch = np.argmax(np.array([V_rowcol_max[j1], V_rowcol_max[j2]])) - -# if P_single_switch == 0: -# template_single_switch = j1*n + arg_rowcol_max[j1] -# else: -# template_single_switch = arg_rowcol_max[j2]*n + j2 - -# V[j1,j2] = V_previous[j1,j2] * no_switch # No switch in either -# P[l, j1, j2] = j1_j2 - -# # Single or double switch? -# single_switch_tmp = single_switch * V_single_switch -# if (single_switch_tmp > double_switch): -# # Then single switch is the alternative -# if (V[j1,j2] < single_switch * V_single_switch): -# V[j1,j2] = single_switch * V_single_switch -# P[l, j1, j2] = template_single_switch -# else: -# # Double switch is the alternative -# if V[j1, j2] < double_switch: -# V[j1, j2] = double_switch -# P[l, j1, j2] = argmax - -# V[j1,j2] *= e[l, index[j1, j2]] -# j1_j2 += 1 -# V_previous = np.copy(V) - -# ll = np.sum(np.log10(c)) + np.log10(np.amax(V)) - -# return V, P, ll - - @nb.njit def forwards_viterbi_dip_low_mem(n, m, G, s, e, r): """LS diploid Viterbi algorithm, with reduced memory.""" @@ -471,47 +311,6 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r): ) -# def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r): - -# # Initialise -# V = np.zeros((m,n,n)) -# P = np.zeros((m,n,n)).astype(np.int64) -# c = np.ones(m) -# index = ( -# 4*np.equal(G[0,:,:], s[0,0]).astype(np.int64) + -# 2*(G[0,:,:] == 1).astype(np.int64) + -# np.int64(s[0,0] == 1) -# ) -# V[0,:,:] = 1/(n**2) * e[0,index] -# r_n = r/n - -# # Jumped the gun - vectorising. -# for l in range(1,m): - -# index = ( -# 4*np.equal(G[l,:,:], s[0,l]).astype(np.int64) + -# 2*(G[l,:,:] == 1).astype(np.int64) + -# np.int64(s[0,l] == 1) -# ) - -# for j1 in range(n): -# for j2 in range(n): -# v = (r_n[l]**2) * np.ones((n,n)) -# v[j1,j2] += (1-r[l])**2 -# v[j1, :] += (r_n[l] * (1 - r[l])) -# v[:, j2] += (r_n[l] * (1 - r[l])) -# v *= V[l-1,:,:] -# V[l,j1,j2] = np.amax(v) * e[l,index[j1, j2]] -# P[l,j1,j2] = np.argmax(v) - -# c[l] = np.amax(V[l,:,:]) -# V[l,:,:] *= 1/c[l] - -# ll = np.sum(np.log10(c)) - -# return V, P, ll - - @nb.njit def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r): """Vectorised LS diploid Viterbi algorithm using numpy.""" diff --git a/lshmm/vit_haploid_samples_variants.py b/lshmm/vit_haploid_samples_variants.py index 7ac4c7f..1793c5d 100644 --- a/lshmm/vit_haploid_samples_variants.py +++ b/lshmm/vit_haploid_samples_variants.py @@ -3,7 +3,7 @@ import numpy as np -@nb.jit +@nb.njit def viterbi_naive_init(n, m, H, s, e, r): """Initialise naive implementation of LS viterbi.""" V = np.zeros((n, m)) @@ -15,7 +15,7 @@ def viterbi_naive_init(n, m, H, s, e, r): return V, P, r_n -@nb.jit +@nb.njit def viterbi_init(n, m, H, s, e, r): """Initialise naive, but more space memory efficient implementation of LS viterbi.""" V_previous = 1 / n * e[np.equal(H[:, 0], s[0, 0]).astype(np.int64), 0] @@ -27,7 +27,7 @@ def viterbi_init(n, m, H, s, e, r): return V, V_previous, P, r_n -@nb.jit +@nb.njit def forwards_viterbi_hap_naive(n, m, H, s, e, r): """Naive implementation of LS haploid Viterbi algorithm.""" # Initialise @@ -51,7 +51,7 @@ def forwards_viterbi_hap_naive(n, m, H, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r): """Naive matrix based implementation of LS haploid forward Viterbi algorithm using numpy.""" # Initialise @@ -88,7 +88,7 @@ def forwards_viterbi_hap_naive_full_vec(n, m, H, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r): """Naive implementation of LS haploid Viterbi algorithm, with reduced memory.""" # Initialise @@ -113,7 +113,7 @@ def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r): """Naive implementation of LS haploid Viterbi algorithm, with reduced memory and rescaling.""" # Initialise @@ -142,7 +142,7 @@ def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_hap_low_mem_rescaling(n, m, H, s, e, r): """LS haploid Viterbi algorithm, with reduced memory and exploits the Markov process structure.""" # Initialise @@ -168,7 +168,7 @@ def forwards_viterbi_hap_low_mem_rescaling(n, m, H, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r): """LS haploid Viterbi algorithm with even smaller memory footprint and exploits the Markov process structure.""" # Initialise @@ -195,7 +195,7 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r): return V, P, ll -@nb.jit +@nb.njit def backwards_viterbi_hap(m, V_last, P): """Run a backwards pass to determine the most likely path.""" # Initialise @@ -209,7 +209,7 @@ def backwards_viterbi_hap(m, V_last, P): return path -@nb.jit +@nb.njit def path_ll_hap(n, m, H, path, s, e, r): """Evaluate log-likelihood path through a reference panel which results in sequence s.""" index = np.int64(np.equal(H[path[0], 0], s[0, 0])) diff --git a/lshmm/vit_haploid_variants_samples.py b/lshmm/vit_haploid_variants_samples.py index 3e15373..f1d367e 100644 --- a/lshmm/vit_haploid_variants_samples.py +++ b/lshmm/vit_haploid_variants_samples.py @@ -2,24 +2,8 @@ import numba as nb import numpy as np -# Speedier version, variants x samples - -# def viterbi_naive_init(n, m, H, s, e, r): -# ''' -# Initialisation portion of initial naive implementation of LS viterbi to avoid -# lots of code duplication -# ''' - -# V = np.zeros((m,n)) -# P = np.zeros((m,n)).astype(np.int64) -# V[0,:] = 1/n * e[0,np.equal(H[0,:], s[0,0]).astype(np.int64)] -# P[0,:] = 0 # Reminder -# r_n = r/n -# return V, P, r_n - - -@nb.jit +@nb.njit def viterbi_naive_init(n, m, H, s, e, r): """Initialise naive implementation of LS viterbi.""" V = np.zeros((m, n)) @@ -31,22 +15,7 @@ def viterbi_naive_init(n, m, H, s, e, r): return V, P, r_n -# def viterbi_init(n, m, H, s, e, r): -# ''' -# Initialisation portion of initial naive, but more space memory efficient implementation -# of LS viterbi to avoid lots of code duplication -# ''' - -# V_previous = 1/n * e[0,np.equal(H[0,:], s[0,0]).astype(np.int64)] -# V = np.zeros(n) -# P = np.zeros((m,n)).astype(np.int64) -# P[0,:] = 0 # Reminder -# r_n = r/n - -# return V, V_previous, P, r_n - - -@nb.jit +@nb.njit def viterbi_init(n, m, H, s, e, r): """Initialise naive, but more space memory efficient implementation of LS viterbi.""" V_previous = np.zeros(n) @@ -60,34 +29,7 @@ def viterbi_init(n, m, H, s, e, r): return V, V_previous, P, r_n -# def forwards_viterbi_hap_naive(n, m, H, s, e, r): -# ''' -# Simple naive LS forward Viterbi algorithm. -# ''' - -# # Initialise -# V, P, r_n = viterbi_naive_init(n, m, H, s, e, r) - -# for j in range(1,m): -# for i in range(n): -# # Get the vector to maximise over -# v = np.zeros(n) -# for k in range(n): -# # v[k] = e[j,np.equal(H[j,i], s[0,j]).astype(np.int64)] * V[j-1,k] -# v[k] = e[j,np.int64(np.equal(H[j,i], s[0,j]))] * V[j-1,k] -# if k == i: -# v[k] *= 1 - r[j] + r_n[j] -# else: -# v[k] *= r_n[j] -# P[j,i] = np.argmax(v) -# V[j,i] = v[P[j,i]] - -# ll = np.log10(np.amax(V[m-1,:])) - -# return V, P, ll - - -@nb.jit +@nb.njit def forwards_viterbi_hap_naive(n, m, H, s, e, r): """Naive implementation of LS haploid Viterbi algorithm.""" # Initialise @@ -112,30 +54,7 @@ def forwards_viterbi_hap_naive(n, m, H, s, e, r): return V, P, ll -# def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r): -# ''' -# Simple matrix based method naive LS forward Viterbi algorithm. Vectorised things -# - I jumped the gun! -# ''' - -# # Initialise -# V, P, r_n = viterbi_naive_init(n, m, H, s, e, r) - -# for j in range(1,m): -# v_tmp = V[j-1,:] * r_n[j] -# for i in range(n): -# v = np.copy(v_tmp) -# v[i] += V[j-1,i] * (1 - r[j]) -# v *= e[j,np.int64(np.equal(H[j,i], s[0,j]))] -# P[j,i] = np.argmax(v) -# V[j,i] = v[P[j,i]] - -# ll = np.log10(np.amax(V[m-1,:])) - -# return V, P, ll - - -@nb.jit +@nb.njit def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r): """Naive matrix based implementation of LS haploid forward Viterbi algorithm using numpy.""" # Initialise @@ -155,34 +74,7 @@ def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r): return V, P, ll -# def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r): -# ''' -# Simple naive LS forward Viterbi algorithm. More memory efficient. -# ''' - -# # Initialise -# V, V_previous, P, r_n = viterbi_init(n, m, H, s, e, r) - -# for j in range(1,m): -# for i in range(n): -# # Get the vector to maximise over -# v = np.zeros(n) -# for k in range(n): -# v[k] = e[j,np.int64(np.equal(H[j,i], s[0,j]))] * V_previous[k] -# if k == i: -# v[k] *= 1 - r[j] + r_n[j] -# else: -# v[k] *= r_n[j] -# P[j,i] = np.argmax(v) -# V[i] = v[P[j,i]] -# V_previous = np.copy(V) - -# ll = np.log10(np.amax(V)) - -# return V, P, ll - - -@nb.jit +@nb.njit def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r): """Naive implementation of LS haploid Viterbi algorithm, with reduced memory.""" # Initialise @@ -207,39 +99,7 @@ def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r): return V, P, ll -# def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r): -# ''' -# Simple naive LS forward Viterbi algorithm. More memory efficient, and with -# a rescaling to avoid underflow problems -# ''' - -# # Initialise -# V, V_previous, P, r_n = viterbi_init(n, m, H, s, e, r) -# c = np.ones(m) - -# for j in range(1,m): -# c[j] = np.amax(V_previous) -# V_previous *= 1/c[j] -# for i in range(n): -# # Get the vector to maximise over -# v = np.zeros(n) -# for k in range(n): -# v[k] = e[j,np.int64(np.equal(H[j,i], s[0,j]))] * V_previous[k] -# if k == i: -# v[k] *= 1 - r[j] + r_n[j] -# else: -# v[k] *= r_n[j] -# P[j,i] = np.argmax(v) -# V[i] = v[P[j,i]] - -# V_previous = np.copy(V) - -# ll = np.sum(np.log10(c)) + np.log10(np.amax(V)) - -# return V, P, ll - - -@nb.jit +@nb.njit def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r): """Naive implementation of LS haploid Viterbi algorithm, with reduced memory and rescaling.""" # Initialise @@ -268,36 +128,7 @@ def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r): return V, P, ll -# def forwards_viterbi_hap_low_mem_rescaling(n, m, H, s, e, r): -# ''' -# Simple LS forward Viterbi algorithm. Smaller memory footprint and rescaling, -# and considers the structure of the Markov process. -# ''' - -# # Initialise -# V, V_previous, P, r_n = viterbi_init(n, m, H, s, e, r) -# c = np.ones(m) - -# for j in range(1,m): -# argmax = np.argmax(V_previous) -# c[j] = V_previous[argmax] -# V_previous *= 1/c[j] -# V = np.zeros(n) -# for i in range(n): -# V[i] = V_previous[i] * (1 - r[j] + r_n[j]) -# P[j,i] = i -# if V[i] < r_n[j]: -# V[i] = r_n[j] -# P[j,i] = argmax -# V[i] *= e[j,np.equal(H[j,i], s[0,j]).astype(np.int64)] -# V_previous = np.copy(V) - -# ll = np.sum(np.log10(c)) + np.log10(np.max(V)) - -# return V, P, ll - - -@nb.jit +@nb.njit def forwards_viterbi_hap_low_mem_rescaling(n, m, H, s, e, r): """LS haploid Viterbi algorithm, with reduced memory and exploits the Markov process structure.""" # Initialise @@ -323,37 +154,7 @@ def forwards_viterbi_hap_low_mem_rescaling(n, m, H, s, e, r): return V, P, ll -# def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r): -# ''' -# Simple LS forward Viterbi algorithm. Even smaller memory footprint and rescaling, -# and considers the structure of the Markov process. -# ''' - -# # Initialise -# V = 1/n * e[0, np.equal(H[0,:], s[0,0]).astype(np.int64)] -# P = np.zeros((m,n)).astype(np.int64) -# # P[0,:] = 0 -# r_n = r/n -# c = np.ones(m) - -# for j in range(1,m): -# argmax = np.argmax(V) -# c[j] = V[argmax] -# V *= 1/c[j] -# for i in range(n): -# V[i] = V[i] * (1 - r[j] + r_n[j]) -# P[j,i] = i -# if V[i] < r_n[j]: -# V[i] = r_n[j] -# P[j,i] = argmax -# V[i] *= e[j,np.int64(np.equal(H[j,i], s[0,j]))] - -# ll = np.sum(np.log10(c)) + np.log10(np.max(V)) - -# return V, P, ll - - -@nb.jit +@nb.njit def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r): """LS haploid Viterbi algorithm with even smaller memory footprint and exploits the Markov process structure.""" # Initialise @@ -381,7 +182,7 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r): return V, P, ll -@nb.jit +@nb.njit def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r): """LS haploid Viterbi algorithm with even smaller memory footprint and exploits the Markov process structure.""" # Initialise @@ -391,8 +192,9 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r): r_n = r / n c = np.ones(m) recombs = [ - set() for _ in range(m) + np.zeros(shape=0, dtype=np.int64) for _ in range(m) ] # This is going to be filled with the templates we can recombine to that have higher prob than staying where we are. + V_argmaxes = np.zeros(m) for j in range(1, m): @@ -404,8 +206,8 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r): V[i] = V[i] * (1 - r[j] + r_n[j]) if V[i] < r_n[j]: V[i] = r_n[j] - recombs[j].add( - i + recombs[j] = np.append( + recombs[j], i ) # We add template i as a potential template to recombine to at site j. V[i] *= e[j, np.int64(np.equal(H[j, i], s[0, j]))] @@ -416,7 +218,7 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r): # Speedier version, variants x samples -@nb.jit +@nb.njit def backwards_viterbi_hap(m, V_last, P): """Run a backwards pass to determine the most likely path.""" # Initialise @@ -430,7 +232,7 @@ def backwards_viterbi_hap(m, V_last, P): return path -@nb.jit +@nb.njit def backwards_viterbi_hap_no_pointer(m, V_argmaxes, recombs): """Run a backwards pass to determine the most likely path.""" # Initialise @@ -446,7 +248,7 @@ def backwards_viterbi_hap_no_pointer(m, V_argmaxes, recombs): return path -@nb.jit +@nb.njit def path_ll_hap(n, m, H, path, s, e, r): """Evaluate log-likelihood path through a reference panel which results in sequence s.""" index = np.int64(np.equal(H[0, path[0]], s[0, 0])) diff --git a/pyproject.toml b/pyproject.toml index 9ea1ed9..2ad0da4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ disable = [ "C", # Do not warn about TODO and FIXME comments "fixme", + "E1136" ] [tool.pytest.ini_options] diff --git a/setup.cfg b/setup.cfg index 95f12c9..c48d56e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = lshmm -version = 0.0.1 +version = 0.0.2 long_description = file: README.md long_description_content_type = text/markdown license = MIT diff --git a/tests/test_API.py b/tests/test_API.py index 82b7a48..c84aec1 100644 --- a/tests/test_API.py +++ b/tests/test_API.py @@ -44,7 +44,7 @@ def genotype_emission(self, mu, m): e = np.zeros((m, 8)) e[:, EQUAL_BOTH_HOM] = (1 - mu) ** 2 e[:, UNEQUAL_BOTH_HOM] = mu ** 2 - e[:, BOTH_HET] = 1 - mu + e[:, BOTH_HET] = (1 - mu) ** 2 + mu ** 2 e[:, REF_HOM_OBS_HET] = 2 * mu * (1 - mu) e[:, REF_HET_OBS_HOM] = mu * (1 - mu) @@ -203,7 +203,6 @@ class FBAlgorithmBase(LSBase): """Base for forwards backwards algorithm tests.""" -# @pytest.mark.skip(reason="DEV: skip for time being") class TestMethodsHap(FBAlgorithmBase): """Test that we compute the sample likelihoods across all implementations.""" @@ -221,7 +220,6 @@ def verify(self, ts): B = ls.backwards(H_vs, s, c, r, mu) -# @pytest.mark.skip(reason="DEV: skip for time being") class TestMethodsDip(FBAlgorithmBase): """Test that we compute the sample likelihoods across all implementations.""" @@ -243,7 +241,6 @@ class VitAlgorithmBase(LSBase): """Base for viterbi algoritm tests.""" -# @pytest.mark.skip(reason="DEV: skip for time being") class TestViterbiHap(VitAlgorithmBase): """Test that we have the same log-likelihood across all implementations""" @@ -260,7 +257,6 @@ def verify(self, ts): self.assertAllClose(path_vs, path) -# @pytest.mark.skip(reason="DEV: skip for time being") class TestViterbiDip(VitAlgorithmBase): """Test that we have the same log-likelihood across all implementations""" diff --git a/tests/test_API_multialllelic.py b/tests/test_API_multiallelic.py similarity index 95% rename from tests/test_API_multialllelic.py rename to tests/test_API_multiallelic.py index c62bed1..a840d1e 100644 --- a/tests/test_API_multialllelic.py +++ b/tests/test_API_multiallelic.py @@ -60,11 +60,6 @@ def example_parameters_haplotypes(self, ts, seed=42, scale_mutation=True): n = H.shape[1] m = ts.get_num_sites() - # alleles = [] - # for variant in ts.variants(): - # alleles.append(variant.alleles) - # n_alleles = np.int8([(len(alleles_site)) for alleles_site in alleles]) - # Must be calculated from the genotype matrix because we can now get back mutations that # result in the number of alleles being higher than the number of alleles in the reference panel. n_alleles = np.int8( @@ -160,7 +155,6 @@ class FBAlgorithmBase(LSBase): """Base for forwards backwards algorithm tests.""" -# @pytest.mark.skip(reason="DEV: skip for time being") class TestMethodsHap(FBAlgorithmBase): """Test that we compute the sample likelihoods across all implementations.""" @@ -194,7 +188,6 @@ class VitAlgorithmBase(LSBase): """Base for viterbi algoritm tests.""" -# @pytest.mark.skip(reason="DEV: skip for time being") class TestViterbiHap(VitAlgorithmBase): """Test that we have the same log-likelihood across all implementations""" diff --git a/tests/test_LS_haploid_diploid.py b/tests/test_LS_haploid_diploid.py index 63ebdc0..34012a4 100644 --- a/tests/test_LS_haploid_diploid.py +++ b/tests/test_LS_haploid_diploid.py @@ -221,7 +221,6 @@ class FBAlgorithmBase(LSBase): """Base for forwards backwards algorithm tests.""" -@pytest.mark.skip(reason="DEV: skip for time being") class TestNonTreeMethodsHap(FBAlgorithmBase): """Test that we compute the sample likelihoods across all implementations.""" @@ -287,7 +286,6 @@ def verify_larger(self, ts): self.assertAllClose(ll_vs, ll_sv) -@pytest.mark.skip(reason="DEV: skip for time being") class TestNonTreeMethodsDip(FBAlgorithmBase): """Test that we compute the sample likelihoods across all implementations.""" @@ -454,12 +452,10 @@ def verify_larger(self, ts): self.assertAllClose(ll_vs, ll_sv) -# @pytest.mark.skip(reason="DEV: skip for time being") class VitAlgorithmBase(LSBase): """Base for viterbi algoritm tests.""" -@pytest.mark.skip(reason="DEV: skip for time being") class TestNonTreeViterbiHap(VitAlgorithmBase): """Test that we have the same log-likelihood across all implementations""" @@ -518,7 +514,9 @@ def verify(self, ts): n, m, H_vs, s, e_vs, r ) path_tmp = vh_vs.backwards_viterbi_hap_no_pointer( - m, V_argmaxes_tmp, recombs + m, + V_argmaxes_tmp, + nb.typed.List(recombs), ) ll_check = vh_vs.path_ll_hap(n, m, H_vs, path_tmp, s, e_vs, r) self.assertAllClose(ll_tmp, ll_check) @@ -622,7 +620,9 @@ def verify_larger(self, ts): ) = vh_vs.forwards_viterbi_hap_lower_mem_rescaling_no_pointer( n, m, H_vs, s, e_vs, r ) - path_tmp = vh_vs.backwards_viterbi_hap_no_pointer(m, V_argmaxes_tmp, recombs) + path_tmp = vh_vs.backwards_viterbi_hap_no_pointer( + m, V_argmaxes_tmp, nb.typed.List(recombs) + ) ll_check = vh_vs.path_ll_hap(n, m, H_vs, path_tmp, s, e_vs, r) self.assertAllClose(ll_tmp, ll_check) self.assertAllClose(ll_vs, ll_tmp) @@ -672,7 +672,6 @@ def verify_larger(self, ts): self.assertAllClose(ll_vs, ll_sv) -# @pytest.mark.skip(reason="DEV: skip for time being") class TestNonTreeViterbiDip(VitAlgorithmBase): """Test that we have the same log-likelihood across all implementations""" diff --git a/tests/test_LS_haploid_multiallelic.py b/tests/test_LS_haploid_multiallelic.py deleted file mode 100644 index d78e4cd..0000000 --- a/tests/test_LS_haploid_multiallelic.py +++ /dev/null @@ -1,170 +0,0 @@ -# Simulation -import itertools - -# Python libraries -import msprime -import numpy as np -import pytest -import tskit - -import lshmm.forward_backward.fb_haploid_variants_samples_multiallelic as fbh -import lshmm.vit_haploid_variants_samples_multiallelic as vh - - -class LSBase: - """Superclass of Li and Stephens tests.""" - - def example_haplotypes(self, ts): - - H = ts.genotype_matrix() - s = H[:, 0].reshape(1, H.shape[0]) - H = H[:, 1:] - - return H, s - - def haplotype_emission(self, mu, m): - # Define the emission probability matrix - e = np.zeros((m, 2)) - e[:, 0] = mu - e[:, 1] = 1 - mu - - return e - - def example_parameters_haplotypes(self, ts, seed=42): - """Returns an iterator over combinations of haplotype, recombination and mutation rates.""" - np.random.seed(seed) - H, s = self.example_haplotypes(ts) - n = H.shape[1] - m = ts.get_num_sites() - - alleles = [] - for variant in ts.variants(): - alleles.append(variant.alleles) - - # Here we have equal mutation and recombination - r = np.zeros(m) + 0.01 - mu = np.zeros(m) + 0.01 - r[0] = 0 - - e = self.haplotype_emission(mu, m) - - yield n, m, alleles, H, s, e, r - - # Mixture of random and extremes - rs = [np.zeros(m) + 0.999, np.zeros(m) + 1e-6, np.random.rand(m)] - - mus = [np.zeros(m) + 0.33, np.zeros(m) + 1e-6, np.random.rand(m) * 0.33] - - e = self.haplotype_emission(mu, m) - - for r, mu in itertools.product(rs, mus): - r[0] = 0 - e = self.haplotype_emission(mu, m) - yield n, m, alleles, H, s, e, r - - def assertAllClose(self, A, B): - """Assert that all entries of two matrices are 'close'""" - assert np.allclose(A, B, rtol=1e-9, atol=0.0) - - # Define a bunch of very small tree-sequences for testing a collection of parameters on - def test_simple_n_10_no_recombination(self): - ts = msprime.sim_ancestry( - samples=10, - recombination_rate=0, - random_seed=42, - sequence_length=10, - population_size=10000, - ) - ts = msprime.sim_mutations(ts, rate=1e-5, random_seed=42) - assert ts.num_sites > 3 - self.verify(ts) - - def test_simple_n_6(self): - ts = msprime.sim_ancestry( - samples=6, - recombination_rate=1e-4, - random_seed=42, - sequence_length=40, - population_size=10000, - ) - ts = msprime.sim_mutations(ts, rate=1e-3, random_seed=42) - assert ts.num_sites > 5 - self.verify(ts) - - def test_simple_n_8(self): - ts = msprime.sim_ancestry( - samples=8, - recombination_rate=1e-4, - random_seed=42, - sequence_length=20, - population_size=10000, - ) - ts = msprime.sim_mutations(ts, rate=1e-4, random_seed=42) - assert ts.num_sites > 5 - assert ts.num_trees > 15 - self.verify(ts) - - def test_simple_n_16(self): - ts = msprime.sim_ancestry( - samples=16, - recombination_rate=1e-2, - random_seed=42, - sequence_length=20, - population_size=10000, - ) - ts = msprime.sim_mutations(ts, rate=1e-4, random_seed=42) - assert ts.num_sites > 5 - print(ts.num_trees) - self.verify(ts) - - def verify(self, ts): - raise NotImplementedError() - - -class FBAlgorithmBase(LSBase): - """Base for forwards backwards algorithm tests.""" - - -# @pytest.mark.skip(reason="DEV: skip for time being") -class TestNonTreeMethodsHap(FBAlgorithmBase): - """Test that we compute the sample likelihoods across all implementations.""" - - def verify(self, ts): - for n, m, alleles, H, s, e, r in self.example_parameters_haplotypes(ts): - F, c, ll = fbh.forwards_ls_hap_wrapper( - n, m, alleles, H, s, e, r, norm=False - ) - B = fbh.backwards_ls_hap_wrapper(n, m, alleles, H, s, e, c, r) - self.assertAllClose(np.log10(np.sum(F * B, 1)), ll * np.ones(m)) - F_tmp, c_tmp, ll_tmp = fbh.forwards_ls_hap_wrapper( - n, m, alleles, H, s, e, r, norm=True - ) - B_tmp = fbh.backwards_ls_hap_wrapper(n, m, alleles, H, s, e, c_tmp, r) - self.assertAllClose(ll, ll_tmp) - self.assertAllClose(np.sum(F_tmp * B_tmp, 1), np.ones(m)) - - -@pytest.mark.skip(reason="DEV: skip for time being") -class VitAlgorithmBase(LSBase): - """Base for viterbi algoritm tests.""" - - -@pytest.mark.skip(reason="DEV: skip for time being") -class TestNonTreeViterbiHap(VitAlgorithmBase): - """Test that we have the same log-likelihood across all implementations""" - - def verify(self, ts): - for n, m, alleles, H, s, e, r in self.example_parameters_haplotypes(ts): - - V, P, ll = vh.forwards_viterbi_hap_naive_wrapper(n, m, H, s, e, r) - path = vh.backwards_viterbi_hap_wrapper(m, V[m - 1, :], P) - ll_check = vh.path_ll_hap(n, m, H, path, s, e, r) - self.assertAllClose(ll, ll_check) - - V_tmp, P_tmp, ll_tmp = vh.forwards_viterbi_hap_lower_mem_rescaling_wrapper( - n, m, H, s, e, r - ) - path_tmp = vh.backwards_viterbi_hap_wrapper(m, V_tmp, P_tmp) - ll_check = vh.path_ll_hap(n, m, H, path_tmp, s, e, r) - self.assertAllClose(ll_tmp, ll_check) - self.assertAllClose(ll, ll_tmp)