Skip to content

Commit

Permalink
Added chunking of allele_mapping array so that da.map blocks works
Browse files Browse the repository at this point in the history
Removed numba decorator for the apply_allele_mapping function, for now
  • Loading branch information
Tristan Dennis authored and Tristan Dennis committed Aug 8, 2024
1 parent ab6f28c commit 5e502bf
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
24 changes: 16 additions & 8 deletions malariagen_data/anoph/snp_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1627,15 +1627,23 @@ def biallelic_snp_calls(
# Store alleles, transformed.
variant_allele = ds_bi["variant_allele"].data
variant_allele = variant_allele.rechunk((variant_allele.chunks[0], -1))
# variant_allele_out = da.map_blocks(
# lambda block: apply_allele_mapping(block, allele_mapping, max_allele=1),
# variant_allele,
# dtype=variant_allele.dtype,
# chunks=(variant_allele.chunks[0], [2]),
# )
variant_allele_out = apply_allele_mapping(
variant_allele.compute(), allele_mapping, max_allele=1

# Chunk allele mapping according to same variant_allele.
allele_mapping_chunked = da.from_array(
allele_mapping, chunks=variant_allele.chunks
)

# Apply allele mapping blockwise to variant_allele.
variant_allele_out = da.map_blocks(
lambda allele, map: apply_allele_mapping(allele, map, max_allele=1),
variant_allele,
allele_mapping_chunked,
dtype=variant_allele.dtype,
chunks=(variant_allele.chunks[0], [2]),
)
# variant_allele_out = apply_allele_mapping(
# variant_allele.compute(), allele_mapping, max_allele=1
# )
data_vars["variant_allele"] = ("variants", "alleles"), variant_allele_out

# Store allele counts, transformed, so we don't have to recompute.
Expand Down
6 changes: 2 additions & 4 deletions malariagen_data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ def trim_alleles(ac):
return mapping


@numba.njit
# @numba.njit
def apply_allele_mapping(x, mapping, max_allele):
"""Transform an array x, where the columns correspond to alleles,
according to an allele mapping.
Expand All @@ -1360,9 +1360,7 @@ def apply_allele_mapping(x, mapping, max_allele):
n_sites = x.shape[0]
n_alleles = x.shape[1]
assert mapping.shape[0] == n_sites
assert (
mapping.shape[1] == n_alleles
) # these are not the same, work out what's going on - try running code with debugger? or print statementsd
assert mapping.shape[1] == n_alleles

# Create output array.
out = np.empty(shape=(n_sites, max_allele + 1), dtype=x.dtype)
Expand Down

0 comments on commit 5e502bf

Please sign in to comment.