Skip to content

Commit

Permalink
Fix bug with multiple regions in PCA and snp_allele_counts (#338)
Browse files Browse the repository at this point in the history
* fix bug with multiple regions

* fix exception handling

* upgrade pre-commit and relint
  • Loading branch information
alimanfoo authored Feb 23, 2023
1 parent 0927836 commit 873393f
Show file tree
Hide file tree
Showing 18 changed files with 113 additions and 177 deletions.
15 changes: 10 additions & 5 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
[flake8]
ignore =
E203 # whitespace before ':' - doesn't work well with black
E402 # module level import not at top of file
E501 # line too long - let black worry about that
E731 # do not assign a lambda expression, use a def
W503 # line break before binary operator
# whitespace before ':' - doesn't work well with black
E203
# module level import not at top of file
E402
# line too long - let black worry about that
E501
# do not assign a lambda expression, use a def
E731
# line break before binary operator
W503
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
# https://pre-commit.com/
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v4.4.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
# isort should run before black as black sometimes tweaks the isort output
- repo: https://github.com/pycqa/isort
rev: 5.10.1
rev: 5.12.0
hooks:
- id: isort
# https://github.com/python/black#version-control-integration
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 23.1.0
hooks:
- id: black
- repo: https://github.com/keewis/blackdoc
rev: v0.3.8
hooks:
- id: blackdoc
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
rev: 6.0.0
hooks:
- id: flake8
2 changes: 0 additions & 2 deletions malariagen_data/af1.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def __init__(
pre=False,
**kwargs, # used by simplecache, init_filesystem(url, **kwargs)
):

super().__init__(
url=url,
config_path=CONFIG_PATH,
Expand Down Expand Up @@ -303,7 +302,6 @@ def genome_features(
return super().genome_features(region=region, attributes=attributes)

def _plot_genes_setup_data(self, *, region):

# Here we override the superclass implementation because the
# gene annotations don't include a "Name" attribute.
#
Expand Down
12 changes: 0 additions & 12 deletions malariagen_data/ag3.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ def __init__(
pre=False,
**kwargs, # used by simplecache, init_filesystem(url, **kwargs)
):

super().__init__(
url=url,
config_path=CONFIG_PATH,
Expand Down Expand Up @@ -513,7 +512,6 @@ def cross_metadata(self):
debug = self._log.debug

if self._cache_cross_metadata is None:

path = f"{self._base_path}/v3/metadata/crosses/crosses.fam"
fam_names = [
"cross",
Expand Down Expand Up @@ -692,7 +690,6 @@ def cnv_hmm(
debug("access CNV HMM data and concatenate as needed")
lx = []
for r in region:

ly = []
for s in sample_sets:
y = self._cnv_hmm_dataset(
Expand Down Expand Up @@ -723,7 +720,6 @@ def cnv_hmm(

debug("handle sample query")
if sample_query is not None:

debug("load sample metadata")
df_samples = self.sample_metadata(sample_sets=sample_sets)

Expand Down Expand Up @@ -914,7 +910,6 @@ def cnv_coverage_calls(
debug("access data and concatenate as needed")
lx = []
for r in region:

debug("obtain coverage calls for the contig")
x = self._cnv_coverage_calls_dataset(
contig=r.contig,
Expand Down Expand Up @@ -1083,7 +1078,6 @@ def cnv_discordant_read_calls(
debug("access data and concatenate as needed")
lx = []
for c in contig:

ly = []
for s in sample_sets:
y = self._cnv_discordant_read_calls_dataset(
Expand Down Expand Up @@ -1189,7 +1183,6 @@ def _gene_cnv(self, *, region, sample_sets, sample_query, max_coverage_variance)
total=len(df_genes),
)
for gene in genes_iterator:

# locate windows overlapping the gene
loc_gene_start = bisect_left(end, gene.start)
loc_gene_stop = bisect_right(pos, gene.end)
Expand Down Expand Up @@ -1403,12 +1396,10 @@ def _gene_cnv_frequencies(
debug("compute cohort frequencies")
freq_cols = dict()
for coh, loc_coh in coh_dict.items():

n_samples = np.count_nonzero(loc_coh)
debug(f"{coh}, {n_samples} samples")

if n_samples >= min_cohort_size:

# subset data to cohort
is_amp_coh = np.compress(loc_coh, is_amp, axis=1)
is_del_coh = np.compress(loc_coh, is_del, axis=1)
Expand Down Expand Up @@ -1626,7 +1617,6 @@ def _gene_cnv_frequencies_advanced(

debug("build event count and nobs for each cohort")
for cohort_index, cohort in enumerate(df_cohorts.itertuples()):

# construct grouping key
cohort_key = cohort.taxon, cohort.area, cohort.period

Expand Down Expand Up @@ -2528,7 +2518,6 @@ def plot_aim_heatmap(

@numba.njit("Tuple((int8, int64))(int8[:], int8)")
def _cn_mode_1d(a, vmax):

# setup intermediates
m = a.shape[0]
counts = np.zeros(vmax + 1, dtype=numba.int64)
Expand Down Expand Up @@ -2556,7 +2545,6 @@ def _cn_mode_1d(a, vmax):

@numba.njit("Tuple((int8[:], int64[:]))(int8[:, :], int8)")
def _cn_mode(a, vmax):

# setup intermediates
n = a.shape[1]

Expand Down
2 changes: 0 additions & 2 deletions malariagen_data/amin1.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

class Amin1:
def __init__(self, url=DEFAULT_URL, **kwargs):

# setup filesystem
self._fs, self._path = init_filesystem(url, **kwargs)

Expand Down Expand Up @@ -159,7 +158,6 @@ def open_snp_calls(self):
return self._cache_snp_genotypes

def _snp_calls_dataset(self, *, region, inline_array, chunks):

assert isinstance(region, Region)
contig = region.contig

Expand Down
Loading

0 comments on commit 873393f

Please sign in to comment.