Skip to content

Commit

Permalink
Merge pull request #68 from malariagen/0710-cc-67-panda-problems
Browse files Browse the repository at this point in the history
`gene_cnv_frequencies` - fixes pandas fragment warning
  • Loading branch information
cclarkson authored Oct 14, 2021
2 parents cc33ab2 + ca1e601 commit 9d9eb4c
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions malariagen_data/ag3.py
Original file line number Diff line number Diff line change
Expand Up @@ -1738,22 +1738,30 @@ def gene_cnv_frequencies(
)

# compute cohort frequencies
freq_cols = dict()
for coh, loc_samples in coh_dict.items():
n_samples = np.count_nonzero(loc_samples)
if n_samples == 0:
raise ValueError(f"no samples for cohort {coh!r}")
if n_samples < min_cohort_size:
df[f"{coh}_amp"] = np.nan
df[f"{coh}_del"] = np.nan
freq_cols[f"{coh}_amp"] = np.nan
freq_cols[f"{coh}_del"] = np.nan
else:
is_amp_coh = np.compress(loc_samples, is_amp, axis=1)
is_del_coh = np.compress(loc_samples, is_del, axis=1)
amp_count_coh = np.sum(is_amp_coh, axis=1)
del_count_coh = np.sum(is_del_coh, axis=1)
amp_freq_coh = amp_count_coh / n_samples
del_freq_coh = del_count_coh / n_samples
df[f"{coh}_amp"] = amp_freq_coh
df[f"{coh}_del"] = del_freq_coh
freq_cols[f"{coh}_amp"] = amp_freq_coh
freq_cols[f"{coh}_del"] = del_freq_coh

# build a dataframe with the frequency columns
df_freqs = pandas.DataFrame(freq_cols)

# build the final dataframe
df.reset_index(drop=True, inplace=True)
df = pandas.concat([df, df_freqs], axis=1)

# set gene ID as index for convenience
df.set_index("ID", inplace=True)
Expand Down

0 comments on commit 9d9eb4c

Please sign in to comment.