Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into andrei_develop
Browse files Browse the repository at this point in the history
  • Loading branch information
abombin committed Aug 27, 2024
2 parents f849233 + 93ec13f commit f0d1320
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
Empty file modified app_images/logo2c.png
100755 → 100644
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions basic_phenotyper_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,29 @@ def init_pheno_cols(df, marker_names, marker_col_prefix):
# This was previously really slow. Code basically taken from new_phenotyping_lib.py
marker_cols_first_row = df_markers.iloc[0, :].to_list() # get just the first row of marker values
if (0 not in marker_cols_first_row) and (1 not in marker_cols_first_row):

# Null values in df_markers will break the .map() step so check for and remove them here
ser_num_of_null_rows_in_each_column = df_markers.isnull().sum()
if ser_num_of_null_rows_in_each_column.sum() != 0:

# For the time being, import Streamlit so warnings can be rendered. Otherwise, this file does not import streamlit and it should remain that way but this is a minimal fix for the time being
import streamlit as st

st.warning('Null values have been detected in the phenotype columns. Next time, please check for and remove null rows in the datafile unification step (File Handling > Datafile Unification). We are removing them for you now. Here are the numbers of null rows found in each column containing them:')
ser_num_of_null_rows_in_each_column.name = 'Number of null rows'
st.write(ser_num_of_null_rows_in_each_column[ser_num_of_null_rows_in_each_column != 0])

# Perform the operation
row_count_before = len(df)
df = df.dropna(subset=marker_cols)
row_count_after = len(df)

# Display a success message
st.write(f'{row_count_before - row_count_after} rows deleted')

# Update df_markers
df_markers = df[marker_cols]

df_markers = df_markers.map(lambda x: {'+': '1', '-': '0'}[x[-1]])
df['mark_bits'] = df_markers.astype(str).apply(''.join, axis='columns') # efficiently create a series of strings that are the columns (in string format) concatenated together

Expand Down
4 changes: 2 additions & 2 deletions pages2/datafile_format_unifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ def main():
if ser_num_of_null_rows_in_each_column.sum() == 0:
st.success('No null rows detected in the dataset.')
else:
st.write('Null values have been detected. Here are the numbers of null rows found in each column. Note they may not matter depending on the column:')
st.write('Null values have been detected. Here are the numbers of null rows found in the columns containing them. Note they may not matter depending on the column:')
ser_num_of_null_rows_in_each_column.name = 'Number of null rows'
st.write(ser_num_of_null_rows_in_each_column)
st.write(ser_num_of_null_rows_in_each_column[ser_num_of_null_rows_in_each_column != 0])

# Create an expander for the null row deletion section
with st.expander('Click to expand:', expanded=False):
Expand Down

0 comments on commit f0d1320

Please sign in to comment.