-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathsimulation.py
executable file
·191 lines (169 loc) · 8.47 KB
/
simulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# -*- coding: utf-8 -*-
import csv
import os
import copy
import sys
"""
Created on Thu Jul 31 18:09:55 2014
Creates a simulation for Citi-bike rebalancing
@author: root
"""
# Read in the station capacity with the station name as a dict
# note: input file generated by Computed_capacity.R
station_cap = {}
station_capacity_file = "data/station_cap.csv"
with open(station_capacity_file, 'rb') as f:
reader = csv.DictReader(f)
for row in reader:
station_cap[row["station.name"]] = int(row["station_capacity"])
# Read in actual available bikes at 4am on each day at each station given by historical API dump
# note: input file generated by filter_availability.py and availability_jake_edit.R
daily_avail = {}
num_days_seen = {}
running_avail_sum = {}
station_list = set()
station_available_file = "data/station_availability.csv"
with open(station_available_file, 'rb') as f:
reader = csv.DictReader(f)
for row in reader:
if not row["ymd"] in daily_avail:
daily_avail[row["ymd"]] = {}
daily_avail[row["ymd"]][row["station.name"]] = int(row["available_bikes"])
station_list.add(row["station.name"])
# keep running average of station availability at 4am across all days
if row["station.name"] not in num_days_seen:
num_days_seen[row["station.name"]] = 0
running_avail_sum[row["station.name"]] = 0
num_days_seen[row["station.name"]] += 1
running_avail_sum[row["station.name"]] += float(row["available_bikes"])
# compute average availability at 4am for each station
average_daily_avail = {}
for station in running_avail_sum:
average_daily_avail[station] = running_avail_sum[station] / num_days_seen[station]
# Read in the proximity as a dict, storing the closest three stations and their proximities
# note: input file generated by unified R script
station_prox = {}
station_prox_list = []
station_prox_file = "data/stationprox.csv"
with open(station_prox_file, 'rb') as f:
reader = csv.DictReader(f)
for row in reader:
station = row["station.1"]
if not station in station_prox:
station_prox[station] = {}
station_prox[station][station] = 0
station_prox[station][row['station.2']] = float(row["d"])
# read in a flag for simulation strategy
# "greedy": greedy re-routing to best nearby station on start and destination
# "rider": rider flow only, ignoring vans
if len(sys.argv) < 2:
print "usage: %s <strategy>" % sys.argv[0]
print "please enter a simulation strategy, either 'greedy' or 'rider'"
sys.exit(1)
strategy = sys.argv[1]
if strategy != "greedy" and strategy != "rider":
print "%s is not a valid strategy" % strategy
sys.exit(1)
# open input file with actual trips: start station, start time, and end station
trips_sim_file = "data/trips_sim.csv"
with open(trips_sim_file, 'rb') as f:
reader = csv.DictReader(f)
# track previous day, time, and last reset date
previous_d = ""
previous_time = ""
last_reset_date = ""
print_this = False
# dictionary to map station name to current availability
availability = {}
# loop over each actual trip
for row in reader:
start_station = row["start.station.name"]
end_station = row["end.station.name"]
# extract year-month-day in d, time in t, and hour of day
d, t = row["starttime"].split()
hour = int(t.split(':')[0])
if start_station == "DeKalb Ave & Skillman St" or end_station == "DeKalb Ave & Skillman St":
continue
#
# Set availability using actual availability at 4am
#
# throw out initial trips before 4am on first day seen
if last_reset_date == "" and hour < 4:
continue
# if this trip is after 4am and we haven't yet reset today
if hour >= 4 and last_reset_date != d:
last_reset_date = d
# if not d in daily_avail:
# continue
for station in station_list:
if d in daily_avail and station in daily_avail[d]:
# set to availability of this station at 4am on this day
availability[station] = daily_avail[d][station]
else:
# Set to that station's average at 4am across all days
availability[station] = average_daily_avail[station]
#
# Set rerouted stations: end station
#
# compute availability at actual end station
stationpercent_end = float(availability[end_station])/station_cap[end_station]
# if the actual destination is congested, attempt to re-route
if strategy == "greedy" and stationpercent_end > .8:
# find nearby station with lowest availability by taking running min
current_min = stationpercent_end
current_winner = end_station
for station in station_prox[end_station]:
altstationpercent = float(availability[station])/station_cap[station]
if current_min > altstationpercent:
current_min = altstationpercent
current_winner = station
rerouted_end_station = current_winner
else:
# otherwise keep original destination
rerouted_end_station = end_station
if availability[rerouted_end_station] == station_cap[rerouted_end_station]:
# throw out trip if rerouted station is full
rerouted_end_station = "NA"
#
# set rerouted stations: start station
#
# compute availability at the actual start station
stationpercent_start = float(availability[start_station])/station_cap[start_station]
# if the actual origin is starved, attempt to re-route
if strategy == "greedy" and stationpercent_start < .2:
# find nearby station with highest availability by taking running max
current_max = stationpercent_start
current_winner = start_station
for station in station_prox[start_station]:
altstationpercent = float(availability[station])/station_cap[station]
if current_max < altstationpercent:
current_max = altstationpercent
current_winner = station
rerouted_start_station = current_winner
else:
# otherwise keep original destination
rerouted_start_station = start_station
if availability[rerouted_start_station] == 0:
# throw out trip if rerouted station is empty
rerouted_start_station = "NA"
# Update availability
if rerouted_start_station != "NA" and rerouted_end_station != "NA":
# account for bike leaving rerouted start and arriving at rerouted destination
availability[rerouted_start_station] -= 1
availability[rerouted_end_station] += 1
print "\t".join(map(str,[row["starttime"],
rerouted_start_station,
availability[rerouted_start_station],
station_cap[rerouted_start_station],
station_prox[start_station][rerouted_start_station],
rerouted_end_station,
availability[rerouted_end_station],
station_cap[rerouted_end_station],
station_prox[end_station][rerouted_end_station]]))
else:
# print discarded trip for bookkeeping
print "\t".join(map(str,[row["starttime"],
start_station,
availability[start_station],station_cap[start_station],"NA",
end_station,
availability[end_station],station_cap[end_station], "NA"]))