-
Notifications
You must be signed in to change notification settings - Fork 25
/
check_streets_for_imagery.py
127 lines (107 loc) · 6.93 KB
/
check_streets_for_imagery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import requests
import pandas as pd
import sys
import os
from shapely import wkb
from shapely.geometry import LineString
# Create CSV from street_edge table with street_edge_id, x1, y1, x2, y2, geom.
# Name it street_edge_endpoints.csv and put it in the root directory, then run this script.
# It will output a CSV called streets_with_no_imagery.csv in the db/scripts dir.
# Then run `make hide-streets-without-imagery` to mark them as deleted in the db.
OUTPUT_FILE = 'db/streets_with_no_imagery.csv'
def write_output(no_imagery_df, curr_street):
print # Adds newline after the progress percentage.
# If we aren't done, save the last street we were working on at the end to keep track of our progress.
if curr_street is not None:
no_imagery_df = pd.concat([no_imagery_df, pd.DataFrame({'street_edge_id': curr_street.street_edge_id, 'region_id': curr_street.region_id}, index=[0])])
# Convert street_edge_id column from float to int.
no_imagery_df.street_edge_id = no_imagery_df.street_edge_id.astype('int32')
no_imagery_df.region_id = no_imagery_df.region_id.astype('int32')
# Output both_endpoints_data and one_endpoint_data as CSVs.
no_imagery_df.to_csv(OUTPUT_FILE, index=False)
DISTANCE = 0.000135 # Approximately 15 meters in lat/lng. We don't need it to be super accurate here.
def redistribute_vertices(geom):
# Add vertices to Linestring approximately every 15 meters. Adapted from an answer to this stackoverflow post:
# https://stackoverflow.com/questions/34906124/interpolating-every-x-distance-along-multiline-in-shapely
num_vert = int(round(geom.length / DISTANCE))
if num_vert == 0:
num_vert = 1
return LineString([geom.interpolate(float(n) / num_vert, normalized=True) for n in range(num_vert + 1)])
def main():
# Read google maps API key from env variable.
api_key = os.getenv('GOOGLE_MAPS_API_KEY')
if api_key is None:
print("Couldn't read GOOGLE_MAPS_API_KEY environment variable.")
exit(1)
# Read street edge data from CSV.
street_data = pd.read_csv('street_edge_endpoints.csv')
street_data = street_data.sort_values(by=['region_id', 'street_edge_id'])
n_streets = len(street_data)
street_data['id'] = range(1, n_streets + 1)
# Convert geom to Shapely format and add vertices approximately every 15 meters.
street_data['geom'] = list(map(lambda g: redistribute_vertices(wkb.loads(g, hex=True)), list(street_data['geom'])))
# Create dataframe that will hold output data.
streets_with_no_imagery = pd.DataFrame(columns=['street_edge_id', 'region_id'])
# Get current progress and remove data we've already checked.
if os.path.isfile(OUTPUT_FILE):
streets_with_no_imagery = pd.read_csv(OUTPUT_FILE)
progress = streets_with_no_imagery.iloc[-1]['street_edge_id']
progress_index = int(street_data[street_data.street_edge_id == progress]['id'])
street_data = street_data[street_data.id >= progress_index]
# Drop last row, which was only used to hold our current progress through the script.
streets_with_no_imagery.drop(streets_with_no_imagery.tail(1).index, inplace=True)
# Loop through the streets, adding any that are missing GSV imagery to streets_with_no_imagery.
gsv_base_url = 'https://maps.googleapis.com/maps/api/streetview/metadata?source=outdoor&key=' + api_key
gsv_url = gsv_base_url + '&radius=15'
gsv_url_endpoint = gsv_base_url + '&radius=25'
street_data = street_data.set_index('id')
for index, street in street_data.iterrows():
# Print a progress percentage.
percent_complete = 100 * round(float(index) / n_streets, 4)
sys.stdout.write("\r%.2f%% complete" % percent_complete)
sys.stdout.flush()
# Check endpoints first. If neither have imagery, we can say it has no imagery and move on.
try:
first_endpoint = requests.get(gsv_url_endpoint + '&location=' + str(street.y1) + ',' + str(street.x1))
second_endpoint = requests.get(gsv_url_endpoint + '&location=' + str(street.y2) + ',' + str(street.x2))
except (requests.exceptions.RequestException, KeyboardInterrupt) as e:
write_output(streets_with_no_imagery, street)
exit(1)
first_endpoint_fail = pd.json_normalize(first_endpoint.json()).status[0] == 'ZERO_RESULTS'
second_endpoint_fail = pd.json_normalize(second_endpoint.json()).status[0] == 'ZERO_RESULTS'
# If no imagery at either endpoint, add to no imagery list and move on. If at least one has imagery, check many
# points along the street for imagery to figure out whether or not most of the street is missing imagery.
if first_endpoint_fail and second_endpoint_fail:
streets_with_no_imagery = pd.concat([streets_with_no_imagery, pd.DataFrame({'street_edge_id': street.street_edge_id, 'region_id': street.region_id}, index=[0])])
else:
n_success = 0
n_fail = 0
coords = list(street['geom'].coords)
n_coord = len(coords)
# Check for imagery every 15 meters along the street using a smaller radius than endpoints. We use 25 m for
# the endpoints to guarantee we have a place for someone to start. Then we use 15 m at every point along the
# street to ensure that we are not actually finding imagery for a nearby street.
for coord in coords:
try:
response = requests.get(gsv_url + '&location=' + str(coord[1]) + ',' + str(coord[0]))
except (requests.exceptions.RequestException, KeyboardInterrupt) as e:
write_output(streets_with_no_imagery, street)
exit(1)
response_status = pd.json_normalize(response.json()).status[0]
if response_status == 'ZERO_RESULTS':
n_fail += 1
else:
n_success += 1
# If there is no imagery on at least 50% of the street or if an endpoint is missing imagery and there is
# no imagery on at least 25% of the street, add to streets_with_no_imagery. If at any point while
# looping through the points we meet one of those criteria (or we find that we will not be able to meet
# either criteria because there have already been enough points with imagery), we break out of the loop.
if n_fail >= 0.5 * n_coord or (n_fail >= 0.25 * n_coord and (first_endpoint_fail or second_endpoint_fail)):
streets_with_no_imagery = pd.concat([streets_with_no_imagery, pd.DataFrame({'street_edge_id': street.street_edge_id, 'region_id': street.region_id}, index=[0])])
break
elif n_success > 0.75 * n_coord or (n_success > 0.5 * n_coord and not first_endpoint_fail and not second_endpoint_fail):
break
print() # Stops the overflow on new line
write_output(streets_with_no_imagery, None)
if __name__ == '__main__':
main()