Skip to content

Commit

Permalink
Merge pull request #62 from astrochun/47_search_tool
Browse files Browse the repository at this point in the history
Add individual search page; Improvements to Highest Earners data view
  • Loading branch information
astrochun authored Jul 18, 2021
2 parents 2b26e0e + 0f4fde7 commit b01d2bf
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 48 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Changelog

## [v1.0.0](https://github.com/astrochun/uarizona-salary-app/tree/v1.0.0) (2021-07-18)

**Implemented enhancements:**
- Add individual search page; Improvements to Highest Earners data view
[#62](http://github.com/astrochun/uarizona-salary-app/pull/62)

**Fixed bugs:**
- Limit Highest Earners [#56](http://github.com/astrochun/uarizona-salary-app/issues/56)

**Closed issues:**
- Add individual search page [#47](http://github.com/astrochun/uarizona-salary-app/issues/47)
- Format Highest Earner page for currency, FTE and State Fund Ratio
[#61](http://github.com/astrochun/uarizona-salary-app/issues/61)


## [v0.3.0](https://github.com/astrochun/uarizona-salary-app/tree/v0.3.0) (2021-07-16)

**Implemented enhancements:**
Expand Down
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Deployed as Heroku at: https://sapp4ua.herokuapp.com

**TL;DR:**<br>
_This is a website providing public salary data for the University of
Arizona. It is a "Choose Your Own Data Science" (CYODS) tool, so just
Arizona. It is a "Choose Your Own Data Science" tool, so just
explore with different "data views" on the sidebar!_

**More information:**<br>
Expand Down Expand Up @@ -35,12 +35,13 @@ this application's resources.

You can begin your data journey by selecting a "data view" on the sidebar:

1. Trends 🆕 : General facts and numbers (e.g. number of employees,
1. **Individual Search 🆕 : Find all salary data for individual(s)**
2. Trends: General facts and numbers (e.g. number of employees,
salary budget, etc.), for each fiscal year
2. Salary Summary: Statistics and percentile salary data, includes salary histogram
3. Highest Earners: Extract data above a minimum salary
4. College/Division Data: Similar to Salary Summary but extracted for each college(s)/division(s)
5. Department Data: Similar to Salary Summary but extracted for each department(s)
3. Salary Summary: Statistics and percentile salary data, includes salary histogram
4. Highest Earners (Updated): Extract data above a minimum salary. Now you can select a given college/division
5. College/Division Data: Similar to Salary Summary but extracted for each college(s)/division(s)
6. Department Data: Similar to Salary Summary but extracted for each department(s)

Enjoy!<br>
&#8208; Chun 🌵
Expand Down
2 changes: 1 addition & 1 deletion salary_app/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.3.0'
__version__ = '1.0.0'
14 changes: 13 additions & 1 deletion salary_app/commons.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
import streamlit as st

from constants import SALARY_COLUMN, COLLEGE_NAME
from constants import SALARY_COLUMN, EMPLOYMENT_COLUMN, COLLEGE_NAME


def get_summary_data(df: pd.DataFrame, pd_loc_dict: dict, style: str,
Expand Down Expand Up @@ -67,3 +67,15 @@ def show_percentile_data(series_list: list):
for col in ['mean', 'std', 'min', '25%', '50%', '75%', 'max']:
fmt_dict[col] = "${:,.2f}"
st.write(summary_df.style.format(fmt_dict))


def format_salary_df(df: pd.DataFrame):
"""Format dataframe style to for salary, etc"""

fmt_dict = {}
for col in [SALARY_COLUMN, EMPLOYMENT_COLUMN]:
fmt_dict[col] = "${:,.2f}"
fmt_dict['FTE'] = "{:.2f}"
fmt_dict['State Fund Ratio'] = "{:.2f}"

st.write(df.style.format(fmt_dict))
12 changes: 10 additions & 2 deletions salary_app/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

# Commonly used DataFrame column names
SALARY_COLUMN = 'Annual Salary at Full FTE'
EMPLOYMENT_COLUMN = 'Annual Salary at Employment FTE'

STR_N_EMPLOYEES = 'Number of Employees'
COLLEGE_NAME = 'College Name'

Expand All @@ -30,9 +32,15 @@
}

DATA_VIEWS = [
'About', 'Trends (NEW)', 'Salary Summary', 'Highest Earners',
'College/Division Data', 'Department Data'
'About', 'Individual Search (NEW)', 'Trends', 'Salary Summary',
'Highest Earners', 'College/Division Data', 'Department Data',
]

# This is for the Trends page
TRENDS_LIST = ['General', 'Income Bracket']

# This is for Individual Search page
INDIVIDUAL_COLUMNS = [
SALARY_COLUMN, '%', 'FTE', 'Annual Salary at Employment FTE',
'Primary Title', 'Department', COLLEGE_NAME, 'State Fund Ratio',
]
44 changes: 34 additions & 10 deletions salary_app/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
import argparse

import streamlit as st
from streamlit.components.v1 import html
Expand All @@ -11,7 +12,12 @@


@st.cache
def load_data():
def load_data(local: str = ''):
"""Load data"""
if local:
print("Loading data from local source")
else:
print("Loading data from Google Drive")

file_id = {
'FY2019-20': '1d2l29_T-mOh05bglPlwAFlzeV1PIkRXd',
Expand All @@ -25,11 +31,21 @@ def load_data():

data_dict = {}
for year in FY_LIST:
data_dict[year.split(' ')[0]] = pd.read_csv(
f'https://drive.google.com/uc?id={file_id[year.split(" ")[0]]}'
)
year_split = year.split(' ')[0]
if not local:
url = f'https://drive.google.com/uc?id={file_id[year_split]}'
else:
url = f'{local}/{year_split}_clean.csv'
data_dict[year_split] = pd.read_csv(url)

# Get unique.csv
if not local:
unique_url = 'https://drive.google.com/uc?id=1-2aFLO1nbPWT02y8N8Suue0Gg2FisWub'
else:
unique_url = f'{local}/unique.csv'
unique_df = pd.read_csv(unique_url)

return data_dict
return data_dict, unique_df


@st.cache
Expand All @@ -42,7 +58,7 @@ def header_buttons() -> str:
return buttons_html


def main(bokeh=True):
def main(bokeh=True, local: str = ''):
st.set_page_config(page_title=f'{TITLE} - sapp4ua', layout='wide',
initial_sidebar_state='auto')

Expand Down Expand Up @@ -83,7 +99,7 @@ def main(bokeh=True):
)

# Load data
data_dict = load_data()
data_dict, unique_df = load_data(local=local)

# Sidebar, select data view
view_select = sidebar.select_data_view()
Expand All @@ -92,7 +108,7 @@ def main(bokeh=True):

# Sidebar FY selection
fy_select = ''
if view_select not in ['About', 'Trends']:
if view_select not in ['About', 'Trends', 'Individual Search']:
fy_select = sidebar.select_fiscal_year()

# Select dataframe
Expand All @@ -101,7 +117,7 @@ def main(bokeh=True):

# Select pay rate conversion
pay_norm = 1 # Default: Annual = 1.0
if view_select not in ['About', 'Highest Earners']:
if view_select not in ['About', 'Highest Earners', 'Individual Search']:
pay_norm = sidebar.select_pay_conversion(
fy_select, pay_norm, view_select
)
Expand All @@ -128,6 +144,14 @@ def main(bokeh=True):
views.subset_select_data_page(df, 'Department', 'department',
pay_norm, bokeh=bokeh)

if view_select == 'Individual Search':
views.individual_search_page(data_dict, unique_df)


if __name__ == '__main__':
main(bokeh=True)

parser = argparse.ArgumentParser("Streamlit script")
parser.add_argument('--local', default='', help='Local path to specify')
args = parser.parse_args()

main(bokeh=True, local=args.local)
31 changes: 24 additions & 7 deletions salary_app/sidebar.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import streamlit as st

from constants import DATA_VIEWS, FY_LIST, PAY_CONVERSION, FISCAL_HOURS, \
TRENDS_LIST, SALARY_COLUMN
TRENDS_LIST, SALARY_COLUMN, COLLEGE_NAME


def select_data_view() -> str:
Expand Down Expand Up @@ -51,16 +51,33 @@ def select_trends() -> str:
return trends_select


def select_minimum_salary(df, step):
def select_minimum_salary(df, step, college_select: str = ''):
"""Sidebar widget to select minimum salary for Highest Earners page"""

st.sidebar.markdown('### Enter minimum FTE salary:')
sal_describe = df[SALARY_COLUMN].describe()
min_salary = st.sidebar.number_input('',
min_value=int(sal_describe['min']),
max_value=int(sal_describe['max']),
value=500000,
step=step)

number_input_settings = {
'min_value': 100000,
'max_value': int(sal_describe['max']),
'value': 500000,
'step': step
}

if college_select:
t_df = df.loc[df[COLLEGE_NAME] == college_select]
sal_describe = t_df[SALARY_COLUMN].describe()
max_value = int(sal_describe['max'])
number_input_settings['max_value'] = max_value

if max_value > 100000:
number_input_settings['min_value'] = 75000
number_input_settings['value'] = 100000
else:
number_input_settings['min_value'] = 65000
number_input_settings['value'] = 75000

min_salary = st.sidebar.number_input('', **number_input_settings)

return min_salary

Expand Down
Loading

0 comments on commit b01d2bf

Please sign in to comment.