Merge remote-tracking branch 'origin/develop' into andrei_develop

ncats · Aug 27, 2024 · f0d1320 · f0d1320
2 parents f849233 + 93ec13f
commit f0d1320
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 2 deletions.
diff --git a/app_images/logo2c.png b/app_images/logo2c.png
diff --git a/basic_phenotyper_lib.py b/basic_phenotyper_lib.py
@@ -77,6 +77,29 @@ def init_pheno_cols(df, marker_names, marker_col_prefix):
     # This was previously really slow. Code basically taken from new_phenotyping_lib.py
     marker_cols_first_row = df_markers.iloc[0, :].to_list()  # get just the first row of marker values
     if (0 not in marker_cols_first_row) and (1 not in marker_cols_first_row):
+
+        # Null values in df_markers will break the .map() step so check for and remove them here
+        ser_num_of_null_rows_in_each_column = df_markers.isnull().sum()
+        if ser_num_of_null_rows_in_each_column.sum() != 0:
+
+            # For the time being, import Streamlit so warnings can be rendered. Otherwise, this file does not import streamlit and it should remain that way but this is a minimal fix for the time being
+            import streamlit as st
+
+            st.warning('Null values have been detected in the phenotype columns. Next time, please check for and remove null rows in the datafile unification step (File Handling > Datafile Unification). We are removing them for you now. Here are the numbers of null rows found in each column containing them:')
+            ser_num_of_null_rows_in_each_column.name = 'Number of null rows'
+            st.write(ser_num_of_null_rows_in_each_column[ser_num_of_null_rows_in_each_column != 0])
+
+            # Perform the operation
+            row_count_before = len(df)
+            df = df.dropna(subset=marker_cols)
+            row_count_after = len(df)
+
+            # Display a success message
+            st.write(f'{row_count_before - row_count_after} rows deleted')
+
+            # Update df_markers
+            df_markers = df[marker_cols]
+
         df_markers = df_markers.map(lambda x: {'+': '1', '-': '0'}[x[-1]])
     df['mark_bits'] = df_markers.astype(str).apply(''.join, axis='columns')  # efficiently create a series of strings that are the columns (in string format) concatenated together
 

diff --git a/pages2/datafile_format_unifier.py b/pages2/datafile_format_unifier.py
@@ -229,9 +229,9 @@ def main():
                 if ser_num_of_null_rows_in_each_column.sum() == 0:
                     st.success('No null rows detected in the dataset.')
                 else:
-                    st.write('Null values have been detected. Here are the numbers of null rows found in each column. Note they may not matter depending on the column:')
+                    st.write('Null values have been detected. Here are the numbers of null rows found in the columns containing them. Note they may not matter depending on the column:')
                     ser_num_of_null_rows_in_each_column.name = 'Number of null rows'
-                    st.write(ser_num_of_null_rows_in_each_column)
+                    st.write(ser_num_of_null_rows_in_each_column[ser_num_of_null_rows_in_each_column != 0])
 
             # Create an expander for the null row deletion section
             with st.expander('Click to expand:', expanded=False):