From bbfbe4f378bab08f25c4aaddad11965681a6ae2d Mon Sep 17 00:00:00 2001 From: chris Date: Thu, 7 Oct 2021 17:56:04 +0100 Subject: [PATCH 1/3] fixes pandas fragment warning --- malariagen_data/ag3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py index fa4448407..8a129e40d 100644 --- a/malariagen_data/ag3.py +++ b/malariagen_data/ag3.py @@ -1739,6 +1739,7 @@ def gene_cnv_frequencies( # compute cohort frequencies for coh, loc_samples in coh_dict.items(): + df = df.copy() n_samples = np.count_nonzero(loc_samples) if n_samples == 0: raise ValueError(f"no samples for cohort {coh!r}") @@ -1756,7 +1757,7 @@ def gene_cnv_frequencies( df[f"{coh}_del"] = del_freq_coh # set gene ID as index for convenience - df.set_index("ID", inplace=True) + df = df.set_index("ID") return df From 15cc169c9c652cdd45add79ede0764cfe56dd7bc Mon Sep 17 00:00:00 2001 From: chris Date: Wed, 13 Oct 2021 20:41:28 +0100 Subject: [PATCH 2/3] fixes panda warnings using dict --- malariagen_data/ag3.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py index 8a129e40d..803a7b4d9 100644 --- a/malariagen_data/ag3.py +++ b/malariagen_data/ag3.py @@ -1738,14 +1738,14 @@ def gene_cnv_frequencies( ) # compute cohort frequencies + freq_cols = dict() for coh, loc_samples in coh_dict.items(): - df = df.copy() n_samples = np.count_nonzero(loc_samples) if n_samples == 0: raise ValueError(f"no samples for cohort {coh!r}") if n_samples < min_cohort_size: - df[f"{coh}_amp"] = np.nan - df[f"{coh}_del"] = np.nan + freq_cols[f"{coh}_amp"] = np.nan + freq_cols[f"{coh}_del"] = np.nan else: is_amp_coh = np.compress(loc_samples, is_amp, axis=1) is_del_coh = np.compress(loc_samples, is_del, axis=1) @@ -1753,8 +1753,15 @@ def gene_cnv_frequencies( del_count_coh = np.sum(is_del_coh, axis=1) amp_freq_coh = amp_count_coh / n_samples del_freq_coh = del_count_coh / n_samples - df[f"{coh}_amp"] = amp_freq_coh - df[f"{coh}_del"] = del_freq_coh + freq_cols[f"{coh}_amp"] = amp_freq_coh + freq_cols[f"{coh}_del"] = del_freq_coh + + # build a dataframe with the frequency columns + df_freqs = pandas.DataFrame.from_dict(freq_cols) + + # build the final dataframe + df = df.reset_index(drop=True) + df = pandas.concat([df, df_freqs], axis=1) # set gene ID as index for convenience df = df.set_index("ID") From ca1e6012f5e870731399084a885b76b1f7841412 Mon Sep 17 00:00:00 2001 From: chris Date: Thu, 14 Oct 2021 10:53:42 +0100 Subject: [PATCH 3/3] Alistairs inplace suggestions --- malariagen_data/ag3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py index 803a7b4d9..b849d7dd9 100644 --- a/malariagen_data/ag3.py +++ b/malariagen_data/ag3.py @@ -1757,14 +1757,14 @@ def gene_cnv_frequencies( freq_cols[f"{coh}_del"] = del_freq_coh # build a dataframe with the frequency columns - df_freqs = pandas.DataFrame.from_dict(freq_cols) + df_freqs = pandas.DataFrame(freq_cols) # build the final dataframe - df = df.reset_index(drop=True) + df.reset_index(drop=True, inplace=True) df = pandas.concat([df, df_freqs], axis=1) # set gene ID as index for convenience - df = df.set_index("ID") + df.set_index("ID", inplace=True) return df