Merge pull request #62 from astrochun/47_search_tool

Add individual search page; Improvements to Highest Earners data view
astrochun · Jul 18, 2021 · b01d2bf · b01d2bf
2 parents 2b26e0e + 0f4fde7
commit b01d2bf
Show file tree

Hide file tree

Showing 8 changed files with 220 additions and 48 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,20 @@
 # Changelog
 
+## [v1.0.0](https://github.com/astrochun/uarizona-salary-app/tree/v1.0.0) (2021-07-18)
+
+**Implemented enhancements:**
+ - Add individual search page; Improvements to Highest Earners data view
+   [#62](http://github.com/astrochun/uarizona-salary-app/pull/62)
+
+**Fixed bugs:**
+ - Limit Highest Earners [#56](http://github.com/astrochun/uarizona-salary-app/issues/56)
+
+**Closed issues:**
+ - Add individual search page [#47](http://github.com/astrochun/uarizona-salary-app/issues/47)
+ - Format Highest Earner page for currency, FTE and State Fund Ratio
+   [#61](http://github.com/astrochun/uarizona-salary-app/issues/61)
+
+
 ## [v0.3.0](https://github.com/astrochun/uarizona-salary-app/tree/v0.3.0) (2021-07-16)
 
 **Implemented enhancements:**

diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Deployed as Heroku at: https://sapp4ua.herokuapp.com
 
 **TL;DR:**<br>
 _This is a website providing public salary data for the University of
-Arizona. It is a "Choose Your Own Data Science" (CYODS) tool, so just
+Arizona. It is a "Choose Your Own Data Science" tool, so just
 explore with different "data views" on the sidebar!_
 
 **More information:**<br>
@@ -35,12 +35,13 @@ this application's resources.
 
 You can begin your data journey by selecting a "data view" on the sidebar:
 
- 1. Trends 🆕 : General facts and numbers (e.g. number of employees,
+ 1. **Individual Search 🆕 : Find all salary data for individual(s)**
+ 2. Trends: General facts and numbers (e.g. number of employees,
     salary budget, etc.), for each fiscal year
- 2. Salary Summary: Statistics and percentile salary data, includes salary histogram
- 3. Highest Earners: Extract data above a minimum salary
- 4. College/Division Data: Similar to Salary Summary but extracted for each college(s)/division(s)
- 5. Department Data: Similar to Salary Summary but extracted for each department(s)
+ 3. Salary Summary: Statistics and percentile salary data, includes salary histogram
+ 4. Highest Earners (Updated): Extract data above a minimum salary. Now you can select a given college/division
+ 5. College/Division Data: Similar to Salary Summary but extracted for each college(s)/division(s)
+ 6. Department Data: Similar to Salary Summary but extracted for each department(s)
 
 Enjoy!<br>
 &#8208; Chun 🌵

diff --git a/salary_app/__init__.py b/salary_app/__init__.py
@@ -1 +1 @@
-__version__ = '0.3.0'
+__version__ = '1.0.0'
diff --git a/salary_app/commons.py b/salary_app/commons.py
@@ -1,7 +1,7 @@
 import pandas as pd
 import streamlit as st
 
-from constants import SALARY_COLUMN, COLLEGE_NAME
+from constants import SALARY_COLUMN, EMPLOYMENT_COLUMN, COLLEGE_NAME
 
 
 def get_summary_data(df: pd.DataFrame, pd_loc_dict: dict, style: str,
@@ -67,3 +67,15 @@ def show_percentile_data(series_list: list):
     for col in ['mean', 'std', 'min', '25%', '50%', '75%', 'max']:
         fmt_dict[col] = "${:,.2f}"
     st.write(summary_df.style.format(fmt_dict))
+
+
+def format_salary_df(df: pd.DataFrame):
+    """Format dataframe style to for salary, etc"""
+
+    fmt_dict = {}
+    for col in [SALARY_COLUMN, EMPLOYMENT_COLUMN]:
+        fmt_dict[col] = "${:,.2f}"
+    fmt_dict['FTE'] = "{:.2f}"
+    fmt_dict['State Fund Ratio'] = "{:.2f}"
+
+    st.write(df.style.format(fmt_dict))
diff --git a/salary_app/constants.py b/salary_app/constants.py
@@ -4,6 +4,8 @@
 
 # Commonly used DataFrame column names
 SALARY_COLUMN = 'Annual Salary at Full FTE'
+EMPLOYMENT_COLUMN = 'Annual Salary at Employment FTE'
+
 STR_N_EMPLOYEES = 'Number of Employees'
 COLLEGE_NAME = 'College Name'
 
@@ -30,9 +32,15 @@
 }
 
 DATA_VIEWS = [
-    'About', 'Trends (NEW)', 'Salary Summary', 'Highest Earners',
-    'College/Division Data', 'Department Data'
+    'About', 'Individual Search (NEW)', 'Trends', 'Salary Summary',
+    'Highest Earners', 'College/Division Data', 'Department Data',
 ]
 
 # This is for the Trends page
 TRENDS_LIST = ['General', 'Income Bracket']
+
+# This is for Individual Search page
+INDIVIDUAL_COLUMNS = [
+    SALARY_COLUMN, '%', 'FTE', 'Annual Salary at Employment FTE',
+    'Primary Title', 'Department', COLLEGE_NAME, 'State Fund Ratio',
+]
diff --git a/salary_app/main.py b/salary_app/main.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+import argparse
 
 import streamlit as st
 from streamlit.components.v1 import html
@@ -11,7 +12,12 @@
 
 
 @st.cache
-def load_data():
+def load_data(local: str = ''):
+    """Load data"""
+    if local:
+        print("Loading data from local source")
+    else:
+        print("Loading data from Google Drive")
 
     file_id = {
         'FY2019-20': '1d2l29_T-mOh05bglPlwAFlzeV1PIkRXd',
@@ -25,11 +31,21 @@ def load_data():
 
     data_dict = {}
     for year in FY_LIST:
-        data_dict[year.split(' ')[0]] = pd.read_csv(
-            f'https://drive.google.com/uc?id={file_id[year.split(" ")[0]]}'
-        )
+        year_split = year.split(' ')[0]
+        if not local:
+            url = f'https://drive.google.com/uc?id={file_id[year_split]}'
+        else:
+            url = f'{local}/{year_split}_clean.csv'
+        data_dict[year_split] = pd.read_csv(url)
+
+    # Get unique.csv
+    if not local:
+        unique_url = 'https://drive.google.com/uc?id=1-2aFLO1nbPWT02y8N8Suue0Gg2FisWub'
+    else:
+        unique_url = f'{local}/unique.csv'
+    unique_df = pd.read_csv(unique_url)
 
-    return data_dict
+    return data_dict, unique_df
 
 
 @st.cache
@@ -42,7 +58,7 @@ def header_buttons() -> str:
     return buttons_html
 
 
-def main(bokeh=True):
+def main(bokeh=True, local: str = ''):
     st.set_page_config(page_title=f'{TITLE} - sapp4ua', layout='wide',
                        initial_sidebar_state='auto')
 
@@ -83,7 +99,7 @@ def main(bokeh=True):
     )
 
     # Load data
-    data_dict = load_data()
+    data_dict, unique_df = load_data(local=local)
 
     # Sidebar, select data view
     view_select = sidebar.select_data_view()
@@ -92,7 +108,7 @@ def main(bokeh=True):
 
     # Sidebar FY selection
     fy_select = ''
-    if view_select not in ['About', 'Trends']:
+    if view_select not in ['About', 'Trends', 'Individual Search']:
         fy_select = sidebar.select_fiscal_year()
 
         # Select dataframe
@@ -101,7 +117,7 @@ def main(bokeh=True):
 
     # Select pay rate conversion
     pay_norm = 1  # Default: Annual = 1.0
-    if view_select not in ['About', 'Highest Earners']:
+    if view_select not in ['About', 'Highest Earners', 'Individual Search']:
         pay_norm = sidebar.select_pay_conversion(
             fy_select, pay_norm, view_select
         )
@@ -128,6 +144,14 @@ def main(bokeh=True):
         views.subset_select_data_page(df, 'Department', 'department',
                                       pay_norm, bokeh=bokeh)
 
+    if view_select == 'Individual Search':
+        views.individual_search_page(data_dict, unique_df)
+
 
 if __name__ == '__main__':
-    main(bokeh=True)
+
+    parser = argparse.ArgumentParser("Streamlit script")
+    parser.add_argument('--local', default='', help='Local path to specify')
+    args = parser.parse_args()
+
+    main(bokeh=True, local=args.local)
diff --git a/salary_app/sidebar.py b/salary_app/sidebar.py
@@ -3,7 +3,7 @@
 import streamlit as st
 
 from constants import DATA_VIEWS, FY_LIST, PAY_CONVERSION, FISCAL_HOURS, \
-    TRENDS_LIST, SALARY_COLUMN
+    TRENDS_LIST, SALARY_COLUMN, COLLEGE_NAME
 
 
 def select_data_view() -> str:
@@ -51,16 +51,33 @@ def select_trends() -> str:
     return trends_select
 
 
-def select_minimum_salary(df, step):
+def select_minimum_salary(df, step, college_select: str = ''):
     """Sidebar widget to select minimum salary for Highest Earners page"""
 
     st.sidebar.markdown('### Enter minimum FTE salary:')
     sal_describe = df[SALARY_COLUMN].describe()
-    min_salary = st.sidebar.number_input('',
-                                         min_value=int(sal_describe['min']),
-                                         max_value=int(sal_describe['max']),
-                                         value=500000,
-                                         step=step)
+
+    number_input_settings = {
+        'min_value': 100000,
+        'max_value': int(sal_describe['max']),
+        'value': 500000,
+        'step': step
+    }
+
+    if college_select:
+        t_df = df.loc[df[COLLEGE_NAME] == college_select]
+        sal_describe = t_df[SALARY_COLUMN].describe()
+        max_value = int(sal_describe['max'])
+        number_input_settings['max_value'] = max_value
+
+        if max_value > 100000:
+            number_input_settings['min_value'] = 75000
+            number_input_settings['value'] = 100000
+        else:
+            number_input_settings['min_value'] = 65000
+            number_input_settings['value'] = 75000
+
+    min_salary = st.sidebar.number_input('', **number_input_settings)
 
     return min_salary