Skip to content

Commit

Permalink
Recompute salary data with consistent set of neighborhoods
Browse files Browse the repository at this point in the history
  • Loading branch information
rcalef committed May 5, 2024
1 parent 5145af7 commit 46300a3
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 147 deletions.
120 changes: 9 additions & 111 deletions data/employment_opportunities/extract_employment_opp_info.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,13 @@
#!/usr/bin/env python3
import numpy as np
import pandas as pd

neighborhood_names = ["Allston/Brighton", "Back Bay/Beacon Hill", "Central Boston", "Charlestown", "Dorchester", "East Boston", "Fenway/Kenmore", "Hyde Park", "Jamaica Plain", "Mattapan", "Roslindale", "Roxbury", "South Boston", "South End", "West Roxbury", "The Port", "Neighborhood Nine", "Wellington-Harrington", "Mid-Cambridge", "North Cambridge", "Cambridge Highlands", "Strawberry Hill", "West Cambridge", "Riverside", "Cambridgeport", "Area 2/MIT", "East Cambridge", "Baldwin"]

zip_neighborhoods = {
"02134" : "Allston/Brighton",
"02135" : "Allston/Brighton",
"02163" : "Allston/Brighton",
"02108" : "Back Bay/Beacon Hill",
"02116" : "Back Bay/Beacon Hill",
"02117" : "Back Bay/Beacon Hill",
"02123" : "Back Bay/Beacon Hill",
"02133" : "Back Bay/Beacon Hill",
"02199" : "Back Bay/Beacon Hill",
"02216" : "Back Bay/Beacon Hill",
"02217" : "Back Bay/Beacon Hill",
"02295" : "Back Bay/Beacon Hill",
"02101" : "Central Boston",
"02102" : "Central Boston",
"02103" : "Central Boston",
"02104" : "Central Boston",
"02105" : "Central Boston",
"02106" : "Central Boston",
"02107" : "Central Boston",
"02109" : "Central Boston",
"02110" : "Central Boston",
"02111" : "Central Boston",
"02112" : "Central Boston",
"02113" : "Central Boston",
"02114" : "Central Boston",
"02196" : "Central Boston",
"02201" : "Central Boston",
"02202" : "Central Boston",
"02203" : "Central Boston",
"02204" : "Central Boston",
"02205" : "Central Boston",
"02206" : "Central Boston",
"02207" : "Central Boston",
"02208" : "Central Boston",
"02209" : "Central Boston",
"02211" : "Central Boston",
"02212" : "Central Boston",
"02222" : "Central Boston",
"02293" : "Central Boston",
"02129" : "Charlestown",
"02122" : "Dorchester",
"02124" : "Dorchester",
"02125" : "Dorchester",
"02128" : "East Boston",
"02228" : "East Boston",
"02115" : "Fenway/Kenmore",
"02215" : "Fenway/Kenmore",
"02136" : "Hyde Park",
"02130" : "Jamaica Plain",
"02126" : "Mattapan",
"02131" : "Roslindale",
"02119" : "Roxbury",
"02120" : "Roxbury",
"02121" : "Roxbury",
"02127" : "South Boston",
"02210" : "South Boston",
"02118" : "South End",
"02132" : "West Roxbury",
"02139" : ["The Port", "Mid-Cambridge", "Riverside", "Cambridgeport", "Area 2/MIT"],
"02140" : "Neighborhood Nine",
"02141" : ["Wellington-Harrington", "North Cambridge", "East Cambridge"],
"02138" : ["Cambridge Highlands", "Strawberry Hill", "West Cambridge", "Baldwin"]
}

overlapping_neighs = [["The Port", "Mid-Cambridge", "Riverside", "Cambridgeport", "Area 2/MIT"], ["Wellington-Harrington", "North Cambridge", "East Cambridge"], ["Cambridge Highlands", "Strawberry Hill", "West Cambridge", "Baldwin"]]
zip_map = pd.read_table("../transportation/mbta/zips_to_hoods.tsv")
zip_neighborhoods = {f"0{x.zip}":x.neighborhoods.split(",") for x in zip_map.itertuples()}
neighborhood_names = []
for hoods in zip_neighborhoods.values():
neighborhood_names.extend(hoods)
neighborhood_names = np.unique(neighborhood_names).tolist()

num_employees = np.zeros(len(neighborhood_names))
num_establishments = np.zeros(len(neighborhood_names))
Expand Down Expand Up @@ -130,50 +69,9 @@

orig_file.close()

## calculate population sizes to normalize data

population = np.zeros(len(neighborhood_names))
census_file = open("DECENNIALDHC2020.P1-Data.txt", 'r')

census_line = census_file.readline()
census_line = census_file.readline()
census_line = census_file.readline()

while(census_line):
census_split = census_line.split()

try:
neigh_idx = neighborhood_names.index(zip_neighborhoods[census_split[2]])
except:
# not one of our zipcodes
census_line = census_file.readline()
continue

population[neigh_idx] += int(census_split[3])

census_line = census_file.readline()

census_file.close()

# save results to file
out = open("parsed_employment_data.txt", 'w')
out.write("NEIGHBORHOOD\tNUM_EST\tNUM_EMP\tPAY_ANN\tPAY_QTR1\tPOP\tNORM_EST\tNORM_EMP\tNORM_PAY_ANN\tNORM_PAY_QTR1\tANN_PAY_DIV_EMP\tOVERLAPPING_NEIGHBORHOODS\n")
out.write("NEIGHBORHOOD\tNUM_EST\tNUM_EMP\tPAY_ANN\tPAY_QTR1\tANN_PAY_DIV_EMP\n")
for i in range(len(neighborhood_names)):
out.write(neighborhood_names[i] + '\t' + str(int(num_establishments[i])) + '\t' + str(int(num_employees[i])) + '\t' + str(int(ann_payroll[i])) + '\t' + str(int(q1_payroll[i])) + '\t' + str(int(population[i])) + '\t' + str(num_establishments[i]/population[i]) + '\t' + str(num_employees[i]/population[i]) + '\t' + str(ann_payroll[i]/population[i]) + '\t' + str(q1_payroll[i]/population[i]) + '\t' + str(ann_payroll[i]/num_employees[i]) + '\t')

# write the list of neighborhoods that overlap by zipcode, if applicable (sometimes happens in the Cambridge area)
in_list = False
to_write = ""
for overlap_list in overlapping_neighs:
if neighborhood_names[i] in overlap_list:
for j in range(len(overlap_list)):
if overlap_list[j] != neighborhood_names[i]:
to_write += overlap_list[j] + ", "
in_list = True

if in_list:
out.write(to_write[:-2] + '\n')
else:
out.write("NA\n")

out.write(neighborhood_names[i] + '\t' + str(int(num_establishments[i])) + '\t' + str(int(num_employees[i])) + '\t' + str(int(ann_payroll[i])) + '\t' + str(int(q1_payroll[i])) + '\t' + str(ann_payroll[i]/num_employees[i]) + '\n')
out.close()
66 changes: 37 additions & 29 deletions data/employment_opportunities/parsed_employment_data.txt
Original file line number Diff line number Diff line change
@@ -1,29 +1,37 @@
NEIGHBORHOOD NUM_EST NUM_EMP PAY_ANN PAY_QTR1 POP NORM_EST NORM_EMP NORM_PAY_ANN NORM_PAY_QTR1 ANN_PAY_DIV_EMP OVERLAPPING_NEIGHBORHOODS
Allston/Brighton 1230 21554 1543275 364162 70330 0.01748898052040381 0.30646950092421443 21.943338546850562 5.177904166074222 71.60039899786582 NA
Back Bay/Beacon Hill 3377 106809 15947996 4634691 29266 0.11538987220665618 3.649593384815144 544.9325497163944 158.36434770723707 149.3132226685017 NA
Central Boston 5193 167140 25844167 7318903 38163 0.13607420800251552 4.37963472473338 677.2048057018578 191.78007494169745 154.62586454469306 NA
Charlestown 503 12664 999317 230729 19120 0.02630753138075314 0.6623430962343096 52.26553347280335 12.067416317991631 78.91006001263423 NA
Dorchester 1648 23776 1419043 334213 110283 0.014943372958660899 0.21559079821912716 12.867286889185097 3.03050334140348 59.683840847913864 NA
East Boston 853 20398 1217494 265088 43066 0.019806808154925 0.4736451028653694 28.270422142757628 6.155389402312729 59.68693009118541 NA
Fenway/Kenmore 1254 112831 8997755 2052613 58530 0.02142491030240902 1.9277464548094994 153.72894242268922 35.06941739279002 79.74541571022148 NA
Hyde Park 458 4965 275167 60201 35102 0.013047689590336732 0.1414449319127115 7.8390689989174405 1.7150304825935845 55.42134944612286 NA
Jamaica Plain 703 13852 965731 226731 38284 0.018362762511754258 0.36182217114199144 25.225446661790826 5.922343537770348 69.7178024833959 NA
Mattapan 242 1899 81688 18476 22734 0.0106448491246591 0.08353127474267617 3.593208410310548 0.8127034397818246 43.01632438125329 NA
Roslindale 455 5372 250446 55707 30544 0.014896542692509167 0.17587742273441592 8.199515453116815 1.8238279203771608 46.62062546537602 NA
Roxbury 865 13006 886976 194998 75519 0.011454071160899906 0.172221560137184 11.745070776890584 2.582105165587468 68.19744733200062 NA
South Boston 1798 68890 10619740 2950757 43747 0.04109996114019247 1.5747365533636593 242.75356024413102 67.4504994628203 154.15502975758454 NA
South End 894 20560 1402286 302364 29073 0.030750180579919514 0.7071853609878581 48.23327485983559 10.400165101640697 68.2045719844358 NA
West Roxbury 493 5323 248980 57943 27111 0.01818450075615064 0.1963409686105271 9.183726162812142 2.1372505625023055 46.774375352244974 NA
The Port 1173 52093 9139616 2335424 0 inf inf inf inf 175.4480640393143 Mid-Cambridge, Riverside, Cambridgeport, Area 2/MIT
Neighborhood Nine 503 8205 982498 254600 21447 0.023453163612626473 0.3825709889495034 45.81050962838626 11.8711241665501 119.74381474710542 NA
Wellington-Harrington 389 14403 1872232 543695 0 inf inf inf inf 129.98903006318127 North Cambridge, East Cambridge
Mid-Cambridge 1173 52093 9139616 2335424 0 inf inf inf inf 175.4480640393143 The Port, Riverside, Cambridgeport, Area 2/MIT
North Cambridge 389 14403 1872232 543695 0 inf inf inf inf 129.98903006318127 Wellington-Harrington, East Cambridge
Cambridge Highlands 1108 44379 3420910 807398 0 inf inf inf inf 77.08398116226142 Strawberry Hill, West Cambridge, Baldwin
Strawberry Hill 1108 44379 3420910 807398 0 inf inf inf inf 77.08398116226142 Cambridge Highlands, West Cambridge, Baldwin
West Cambridge 1108 44379 3420910 807398 0 inf inf inf inf 77.08398116226142 Cambridge Highlands, Strawberry Hill, Baldwin
Riverside 1173 52093 9139616 2335424 0 inf inf inf inf 175.4480640393143 The Port, Mid-Cambridge, Cambridgeport, Area 2/MIT
Cambridgeport 1173 52093 9139616 2335424 0 inf inf inf inf 175.4480640393143 The Port, Mid-Cambridge, Riverside, Area 2/MIT
Area 2/MIT 1173 52093 9139616 2335424 0 inf inf inf inf 175.4480640393143 The Port, Mid-Cambridge, Riverside, Cambridgeport
East Cambridge 389 14403 1872232 543695 0 inf inf inf inf 129.98903006318127 Wellington-Harrington, North Cambridge
Baldwin 1108 44379 3420910 807398 0 inf inf inf inf 77.08398116226142 Cambridge Highlands, Strawberry Hill, West Cambridge
NEIGHBORHOOD NUM_EST NUM_EMP PAY_ANN PAY_QTR1 ANN_PAY_DIV_EMP
Allston 539 8304 596490 139035 71.83164739884393
Area 2/MIT 1173 52093 9139616 2335424 175.4480640393143
Back Bay 1979 67148 9874471 2775575 147.05532554953237
Baldwin 1108 44379 3420910 807398 77.08398116226142
Beacon Hill 1087 19066 2139545 594518 112.21782230147907
Brighton 656 12837 915721 217918 71.33450183064579
Cambridge Highlands 1108 44379 3420910 807398 77.08398116226142
Cambridgeport 1173 52093 9139616 2335424 175.4480640393143
Charlestown 503 12664 999317 230729 78.91006001263423
Chinatown 963 33148 4271232 1225276 128.85338481959695
Dorchester 1648 23776 1419043 334213 59.683840847913864
Downtown 3556 107148 20238471 6044190 188.8833296001792
East Boston 853 20398 1217494 265088 59.68693009118541
East Cambridge 389 14403 1872232 543695 129.98903006318127
Fenway 540 51174 4156938 957585 81.23144565599719
Hyde Park 458 4965 275167 60201 55.42134944612286
Jamaica Plain 703 13852 965731 226731 69.7178024833959
Longwood 714 61657 4840817 1095028 78.51204242827254
Mattapan 242 1899 81688 18476 43.01632438125329
Mid-Cambridge 1173 52093 9139616 2335424 175.4480640393143
Mission Hill 176 3811 218842 50239 57.423773287850956
Neighborhood Nine 503 8205 982498 254600 119.74381474710542
North Cambridge 503 8205 982498 254600 119.74381474710542
North End 180 1805 90787 15939 50.297506925207756
Riverside 1173 52093 9139616 2335424 175.4480640393143
Roslindale 455 5372 250446 55707 46.62062546537602
Roxbury 689 9195 668134 144759 72.66275149537792
South Boston 700 9465 770687 199845 81.42493396724775
South Boston Waterfront 1098 59425 9849053 2750912 165.73921750105174
South End 894 20560 1402286 302364 68.2045719844358
Strawberry Hill 1108 44379 3420910 807398 77.08398116226142
The Port 1173 52093 9139616 2335424 175.4480640393143
Wellington-Harrington 389 14403 1872232 543695 129.98903006318127
West Cambridge 1108 44379 3420910 807398 77.08398116226142
West End 744 43436 4759674 1098352 109.57901280044203
West Roxbury 493 5323 248980 57943 46.774375352244974
40 changes: 40 additions & 0 deletions data/transportation/mbta/zips_to_hoods.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
zip neighborhoods
2139 Mid-Cambridge,Riverside,The Port,Cambridgeport,Area 2/MIT
2138 Strawberry Hill,West Cambridge,Baldwin,Cambridge Highlands
2140 North Cambridge,Neighborhood Nine
2141 Wellington-Harrington,East Cambridge
2134 Allston
2116 Back Bay
2114 West End
2108 Beacon Hill
2135 Brighton
2113 North End
2109 Downtown
2110 Downtown
2199 Downtown
2203 Downtown
2215 Fenway
2222 Downtown
2283 Downtown
2284 Downtown
2293 Downtown
2297 Downtown
2298 Downtown
2129 Charlestown
2111 Chinatown
2121 Roxbury
2122 Dorchester
2124 Dorchester
2125 Dorchester
2128 East Boston
2136 Hyde Park
2130 Jamaica Plain
2115 Longwood
2126 Mattapan
2120 Mission Hill
2119 Roxbury
2127 South Boston
2210 South Boston Waterfront
2118 South End
2131 Roslindale
2132 West Roxbury
11 changes: 4 additions & 7 deletions src/routes/dashboard/Dashboard.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@
}
});
const bostonUrl = 'https://raw.githubusercontent.com/yoakiyama/zoning-dashboard-fp/main/data/employment_opportunities/parsed_employment_data.txt';
salaryData = await d3.tsv(bostonUrl, (d) => {
const salaryUrl = 'https://raw.githubusercontent.com/yoakiyama/zoning-dashboard-fp/main/data/employment_opportunities/parsed_employment_data.txt';
salaryData = await d3.tsv(salaryUrl, (d) => {
return {
neighborhood: d.NEIGHBORHOOD,
avg_salary: +d.ANN_PAY_DIV_EMP,
Expand Down Expand Up @@ -645,11 +645,8 @@
// Coloring of neighborhoods by salary
$: {
if (map && salaryLayerId && salaryColor) {
minSalary = 40;
maxSalary = 180;
map.setPaintProperty(salaryLayerId, 'fill-color', [
'case',
Expand Down Expand Up @@ -745,8 +742,8 @@
$ :{
if (clickedNeighborhood !== null || workingNeighborhood !== null) {
const layers = [
{ id: rentOutlineLayerId},
{ id: commuteLineLayerId},
{ id: rentOutlineLayerId},
{ id: commuteLineLayerId},
{ id: salaryLineLayerId},
];
for (const layer of layers) {
Expand Down

0 comments on commit 46300a3

Please sign in to comment.