-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy patheval.py
244 lines (227 loc) · 9.79 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import numpy as np
import pickle
from tqdm import tqdm
import torch
from torch import nn
import pandas as pd
import geopandas as gpd
import random
import copy
import matplotlib.pyplot as plt
import numpy as np
from scipy import spatial
from sklearn.cluster import KMeans
import time
from haversine import haversine
import networkx as nx
import multiprocessing as mp
from scipy import stats
import statistics
import torch_geometric
from termcolor import cprint, colored
import sys
from collections import OrderedDict
import datetime
from model_all import Model
from args import *
from my_constants import *
from utils import *
args = make_args()
print(args)
JUMP = 10000
MAX_ITERS = 300
def trip_length(path):
global graph, backward
return sum([graph[map_edge_id_to_u_v[backward[e]][0]][map_edge_id_to_u_v[backward[e]][1]][0]["length"] for e in path])
def intersections_and_unions(path1, path2):
global graph, backward
path1, path2 = set(path1), set(path2)
intersection = sum([graph[map_edge_id_to_u_v[backward[e]][0]][map_edge_id_to_u_v[backward[e]][1]][0]["length"] for e in path1.intersection(path2)])
union = sum([graph[map_edge_id_to_u_v[backward[e]][0]][map_edge_id_to_u_v[backward[e]][1]][0]["length"] for e in path1.intersection(path2)])
return intersection, union
def shorten_path(path, true_dest):
global map_edge_id_to_u_v, backward, map_node_osm_to_coords
dest_node = map_edge_id_to_u_v[backward[true_dest]][0]
_, index = min([(haversine(map_node_osm_to_coords[map_edge_id_to_u_v[backward[edge]][1]], map_node_osm_to_coords[dest_node]), i) for i,edge in enumerate(path)])
return path[:index+1]
def gen_paths_no_hierarchy(all_paths):
global JUMP
ans = []
for i in tqdm(list(range(0, len(all_paths), JUMP)), desc = "batch_eval", dynamic_ncols=True):
temp = all_paths[i:i+JUMP]
ans.append(gen_paths_no_hierarchy_helper(temp))
return [t for sublist in ans for t in sublist]
def gen_paths_no_hierarchy_helper(all_paths):
global model, node_nbrs, max_nbrs, edge_to_node_mapping
true_paths = [p for _,p,_ in all_paths]
model.eval()
gens = [[t[0]] for t in true_paths]
pending = OrderedDict({i:None for i in range(len(all_paths))})
with torch.no_grad():
for _ in tqdm(range(MAX_ITERS), desc = "generating trips in lockstep", dynamic_ncols=True):
true_paths = [all_paths[i][1] for i in pending]
current_temp = [gens[i][-1] for i in pending]
assert not args.traffic, colored('Not implemented','red')
current = [c for c in current_temp for _ in node_nbrs[c]]
pot_next = [nbr for c in current_temp for nbr in node_nbrs[c]]
dests = [t[-1] for c,t in zip(current_temp, true_paths) for _ in (node_nbrs[c] if c in node_nbrs else [])]
unnormalized_confidence = model(current, dests, pot_next)
chosen = torch.argmax(unnormalized_confidence.reshape(-1, max_nbrs), dim = 1)
chosen = chosen.detach().cpu().tolist()
pending_trip_ids = list(pending.keys())
for identity, choice in zip(pending_trip_ids, chosen):
choice = node_nbrs[gens[identity][-1]][choice]
last = gens[identity][-1]
if choice == -1:
del pending[identity]
continue
gens[identity].append(choice)
if choice == all_paths[identity][1][-1]:
del pending[identity]
if len(pending) == 0:
break
torch.cuda.empty_cache()
gens = [shorten_path(gen, true[1][-1]) if gen[-1]!=true[1][-1] else gen for gen, true in (zip(gens, all_paths))]
model.train()
return gens
def evaluate_no_hierarchy(data, num = 1000, with_dijkstra = False):
global map_node_osm_to_coords, map_edge_id_to_u_v, backward
to_do = ["precision", "recall", "reachability", "avg_reachability", "acc", "nll", "generated"]
results = {s:None for s in to_do}
cprint("Evaluating {} number of trips".format(num), "magenta")
partial = random.sample(data, num)
t1 = time.time()
if with_dijkstra:
gens = [dijkstra(t) for t in tqdm(partial, desc = "Dijkstra for generation", unit = "trip", dynamic_ncols=True)]
else:
gens = gen_paths_no_hierarchy(partial)
elapsed = time.time() -t1
results["time"] = elapsed
preserved_with_stamps = partial.copy()
partial = [p for _,p,_ in partial]
print("Without correction (everything is weighed according to the edge lengths)")
generated = list(zip(partial, gens))
generated = [(t,g) for t,g in generated if len(t)>1]
lengths = [(trip_length(t), trip_length(g)) for (t,g) in generated]
inter_union = [intersections_and_unions(t, g) for (t,g) in generated]
inters = [inter for inter,union in inter_union]
lengths_gen = [l_g for l_t,l_g in lengths]
lengths_true = [l_t for l_t,l_g in lengths]
precs = [i/l if l >0 else 0 for i,l in zip(inters, lengths_gen) ]
precision1 = round(100*sum(precs)/len(precs), 2)
recs = [i/l if l >0 else 0 for i,l in zip(inters, lengths_true) ]
recall1 = round(100*sum(recs)/len(recs), 2)
deepst_accs = [i/max(l1,l2) for i,l1,l2 in zip(inters, lengths_true, lengths_gen) if max(l1,l2)>0]
deepst = round(100*sum(deepst_accs)/len(deepst_accs), 2)
num_reached = len([None for t,g in generated if t[-1] == g[-1]])
lefts = [haversine(map_node_osm_to_coords[map_edge_id_to_u_v[backward[g[-1]]][0]], map_node_osm_to_coords[map_edge_id_to_u_v[backward[t[-1]]][0]]) for t,g in generated]
rights = [haversine(map_node_osm_to_coords[map_edge_id_to_u_v[backward[g[-1]]][1]], map_node_osm_to_coords[map_edge_id_to_u_v[backward[t[-1]]][1]]) for t,g in generated]
reachability = [(l+r)/2 for (l,r) in zip(lefts,rights)]
all_reach = np.mean(reachability)
all_reach = round(1000*all_reach,2)
if len(reachability) != num_reached:
reach = sum(reachability)/(len(reachability)-num_reached)
else:
reach = 0
reach = round(1000*reach,2)
percent_reached = round(100*(num_reached/len(reachability)), 2)
print()
cprint("Precision is {}%".format(precision1), "green")
cprint("Recall is {}%".format(recall1), "green")
print()
cprint("%age of trips reached is {}%".format(percent_reached), "green")
cprint("Avg Reachability(across all trips) is {}m".format(all_reach), "green")
print()
results["precision"] = precision1
results["reachability"] = percent_reached
results["avg_reachability"] = (all_reach, reach)
results["recall"] = recall1
results["generated"] = list(zip(preserved_with_stamps, gens))
return results
def load_model(path_model, path_extras):
f = open(path_extras, 'rb')
forward, map_node_osm_to_coords, map_edge_id_to_u_v = pickle.load(f)
f.close()
backward = {forward[k]:k for k in forward}
node_nbrs = create_node_nbrs(forward)
transformed_graph = nx.DiGraph()
for e1 in node_nbrs:
for e2 in node_nbrs[e1]:
if e2 != -1:
transformed_graph.add_edge(e1, e2)
print('Support file loaded')
model = torch.load(path_model)
model.eval()
print('Model loaded')
return model, map_node_osm_to_coords, map_edge_id_to_u_v, forward, backward, node_nbrs, transformed_graph
def load_data_and_test(path_data):
global model, map_node_osm_to_coords, map_edge_id_to_u_v, forward, backward, node_nbrs, transformed_graph
data = load_test_data(args, forward, fname = path_data)
results = evaluate_no_hierarchy(data = data, num =len(data), with_dijkstra = False)
return results
def dijkstra(true_trip):
global args, transformed_graph, max_nbrs, model
assert args.loss == "v2", "I dont think this will work for loss v1"
_, (src, *_, dest), (s, _) = true_trip
g = transformed_graph
with torch.no_grad():
current_temp = [c for c in g.nodes()]
current = [c for c in current_temp for _ in (node_nbrs[c] if c in node_nbrs else []) ]
pot_next = [nbr for c in current_temp for nbr in (node_nbrs[c] if c in node_nbrs else [])]
dests = [dest for c in current_temp for _ in (node_nbrs[c] if c in node_nbrs else [])]
traffic = None
unnormalized_confidence = model(current, dests, pot_next, traffic)
unnormalized_confidence = -1*torch.nn.functional.log_softmax(unnormalized_confidence.reshape(-1, max_nbrs), dim = 1)
transition_nll = unnormalized_confidence.detach().cpu().tolist()
torch.cuda.empty_cache()
count = 0
for u in g.nodes():
for i,nbr in enumerate(node_nbrs[u]):
if nbr == -1:
break
g[u][nbr]["nll"] = transition_nll[count][i]
count += 1
path = nx.dijkstra_path(g, src, dest, weight = "nll")
path = [x for x in path]
return path
if __name__ == "__main__":
model, map_node_osm_to_coords, map_edge_id_to_u_v, forward, backward, node_nbrs, transformed_graph = load_model(path_model = MODEL_SAVE_PATH, path_extras = MODEL_SUPPORT_PATH)
run_dijkstra = args.with_dijkstra
if run_dijkstra:
cprint("Running NeuroMLR-Dijkstra", "yellow")
else:
cprint("Running NeuroMLR-Greedy", "yellow")
args = model.args
print(model.device)
model.eval()
f = open(PICKLED_GRAPH,'rb')
graph = pickle.load(f)
for e in graph.edges(data=True):
e[2]['length'] = e[2]['length']/1000
f.close()
max_nbrs = max(len(nbr_array) for nbr_array in node_nbrs.values())
for u in range(len(forward)):
if u in node_nbrs:
node_nbrs[u].extend([-1]*(max_nbrs - len(node_nbrs[u])))
else:
node_nbrs[u] = [-1]*max_nbrs
nodes_used = set()
for e in forward:
u,v = map_edge_id_to_u_v[e]
nodes_used.add(u)
nodes_used.add(v)
nodes_used = list(nodes_used)
nodes_forward = {node:i for i,node in enumerate(nodes_used)}
edge_to_node_mapping = {forward[e]:(nodes_forward[map_edge_id_to_u_v[e][0]], nodes_forward[map_edge_id_to_u_v[e][1]]) for e in forward}
edge_to_node_mapping[-1] = (-1,-1)
transformed_graph = nx.DiGraph() # required for Dijkstra
for e1 in node_nbrs:
for e2 in node_nbrs[e1]:
if e2 != -1:
transformed_graph.add_edge(e1, e2)
# data = load_test_data(args, forward, fname = TEST_TRIP_DATA_PICKLED_WITH_TIMESTAMPS)
data = load_test_data(args, forward, fname = TEST_TRIP_SMALL_FIXED_DATA_PICKLED_WITH_TIMESTAMPS)
if run_dijkstra:
cprint("Running Dijkstra takes long. Consider sampling a small test set, if required", "red")
data = random.sample(data, 500)
results = evaluate_no_hierarchy(data = data, num = min(10000,len(data)), with_dijkstra = run_dijkstra)