-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtracks_compare.py
516 lines (439 loc) · 25.4 KB
/
tracks_compare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
############################################################################################
# script which reads gpx tracks and does some analysis
# - plotting all tracks with original data
# - plotting all tracks AND common section (common sections are identified via
# nearest neigbor algorithm of scikit learn
# - plotting the time advantage/lag compared to a referenced track over traveled distance
# - speed of traveled distance
############################################################################################
import gpxpy.gpx
import pandas as pd
from datetime import timedelta
import os
import numpy as np
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import tracks_aux as t_aux
import sys
from itertools import permutations
np.set_printoptions(threshold=sys.maxsize)
# define dictionary with colors
cols_dict = {0: 'blue', 1: 'orange', 2: 'green', 3: 'red', 4: 'purple', 5: 'brown',
6: 'pink', 7: 'olive', 8: 'cyan', 10: 'black', 9: 'magenta'}
# define some dictionary
track_dict = {} # ..for entire data of track
track_const_distance = {} # ..for data which helds data of contant distance (DISTANZ) between way points
track_const_distance_common = {} # ..data which helds the common part of all tracks after nearest neighbor analysis
range_dict = {} # ..helds the range data for each track
# define display settings for pandas data frame (all rows and columns shall be displayed
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
# pandas dataframe for storing all lists
points_df = pd.DataFrame(columns=[]) # all way points for the individual track
tmp_df = pd.DataFrame(columns=[]) # helper data frame
nbrs_pd = pd.DataFrame(columns=[]) # nearest neighbor information
times_pd = pd.DataFrame(columns=[]) # data with times and difference to reference track
speeds_pd = pd.DataFrame(columns=[]) # speed information between way points
SPEED_THRESH = 2 # filter for speed
DISTANCE = 100 # distance between way points (to reduce the data)
# store the intermediate results in lists
lat = []
lon = []
elev = []
sp = []
times = []
speed = []
points = []
dist_per_point = []
distance = []
cum_elevation = []
speed_filt = []
dur = []
time_diff = []
lat_all = []
lon_all = []
elev_all = []
file_names = []
matched_points_of_tracks = []
# define som contants
NO_LEN_TRACKS = []
NO_LEN_TRACKS_COMMON = []
START_INDEX_COMMON = []
lat_red_all, lon_red_all, dur, elev_red_all, distance_from_start_red_all = [], [], [], [],[]
# --------------------------- function definition ---------------------------------------------------
def f_PlausiCheck(nn):
''' check whether the way points are plausible, means way points detected by nearest neighbor
belong to the correct range
:parameter nn: nearest neighbor, way point per way point
:return: 0: combination of tuple does not match the allowed combination
1: combination of tuple matches the allowed combination '''
res = [] # array which helds the result of the range/per waypoint
ret = 0 # return value
r = 0
for wp in nn: # iterate over all the points per nearest neighbor list
for i in range(0, N0_TRACKS): # check all possible ranges
r = 0
if wp in range_dict[i]:
r = i # and store the track range which was found
break
res.append(r) # and append to array for later check
if tuple(res) in RANGE_PERMUT: # check if the nearest neighbors are located in the correct range
# if the range (track for waypoint is correct
ret = 1
else:
# way point is located in wrong range (track)
ret = 0
return ret
def f_rangeCheck(nbrs):
''' extract line by line the way points as list and do a further check within f_PlausiCheck
@:param: nbrs: result of the nearest neighbor analysis
@:return: res: a list which indicates if a tuple is plausbible to the dtected ranges '''
# iterate of the tuple of nearest neighbors nbrs, send the data tofunction f_PlausiCheck which returns then
# if a tuple matches which the plausibility (mark each tuple as true/fals
# and add the information to the list
res = [f_PlausiCheck(nn=[i for i in nbrs.loc[i,]]) for i in range(0, LEN_ALL_POINTS) ]
# and return the result of the plausibility check
return res
# -------------------------------------------------------------- start of the main program ----------------------------
# change current working directory
os.chdir("C:\\Users\\arwe4\\OX Drive (2)\\My files\\gpx\\compare")
# make a list of available gpx files in folder
f_list = [file for file in os.listdir() if '.gpx' in file]
# now read the gpx files and output the progress in console
print('reading files...')
for no, f in enumerate(f_list): # iterate over the list of gpx files
# print the file name which is currently read
print(f)
# add the filename to list which is later used for the matplotlib legend
file_names.append(f)
# read the file and store information in gpx object
gpx_file = open(f)
gpx = gpxpy.parse(gpx_file)
# now iterate of the tracks
for track in gpx.tracks:
# and segments
for segment in track.segments:
# read each point with data of lateral, longitudinal, elevation and time from gpx file
for point_nr, point in enumerate(segment.points):
# and append data to list
points.append(point)
lat.append(point.latitude)
lon.append(point.longitude)
elev.append(point.elevation)
times.append(point.time.time())
if point_nr == 0:
# some post-calculated values are set to 0 for the first point, because some post calculated
# values are based on difference (data_point(i) - data point(i-1))
speed.append(0)
dist_per_point.append(0)
distance.append(0)
cum_elevation.append(0)
speed_filt.append(0)
dur.append(0)
else:
# append the calculated data if the first point has been passed
speed.append(point.speed_between(segment.points[point_nr - 1])) # speed between way points
dist_per_point.append(point.distance_3d(segment.points[point_nr - 1])) # distance between wasy points
distance.append(sum(dist_per_point)) # distance from start to qay point
# calculate the elapsed time from start to current point_nr
dur.append(timedelta(hours=times[point_nr].hour,
minutes=times[point_nr].minute,
seconds=times[point_nr].second) -
timedelta(hours=times[0].hour,
minutes=times[0].minute,
seconds=times[0].second))
# filter speed and store it in list
last_speed_filt = speed_filt[-1]
if (speed[point_nr] - last_speed_filt) > SPEED_THRESH:
speed_filt.append((last_speed_filt + last_speed_filt + SPEED_THRESH) / 2)
elif (last_speed_filt - speed[point_nr]) > SPEED_THRESH:
speed_filt.append((last_speed_filt + (last_speed_filt - SPEED_THRESH)) / 2)
else:
speed_filt.append((last_speed_filt + speed[point_nr]) / 2)
# calculate now the cummulated hight
if point.elevation > segment.points[point_nr - 1].elevation:
# calculate the increase between the last two way points
inc = point.elevation - segment.points[point_nr - 1].elevation
# get the last value
last_value = cum_elevation[-1]
# add increase of hight plus the reached hight at that waypoint
cum_elevation.append(last_value + inc)
else:
# get the last element and maintain it
cum_height = cum_elevation[-1]
cum_elevation.append(cum_height)
# for later data processing copy all generated data of each track to temporary pandas data frame
tmp_df['lateral'] = lat
tmp_df['longitudinal'] = lon
tmp_df['elevation [m]'] = elev
tmp_df['cum_elevation [m]'] = cum_elevation
tmp_df['times [h/m/s]'] = times
tmp_df['dt_duration [s]'] = dur
tmp_df['speed [km/h]'] = [s * 3.6 for s in speed] # convert m/s to kph - non filtered speed
tmp_df['filt speed [km/h]'] = [s * 3.6 for s in speed_filt] # convert m/s to kph - filtered speed
tmp_df['distance from start [m]'] = distance
# now generate a column in pandas data frame which indicates a mulitiple of DISTANCE (just to reduce the data)
# e.g. a 1 indicates that it is a multiple of DISTANCE
tmp_df['match multiple'] = t_aux.f_FindValuesCloseToMultiple(tmp_df['distance from start [m]'].tolist(), DISTANCE)
print('track no', no, 'with', len(tmp_df), 'way points')
# copy temporay pandas data frame to dictionary (each value of dictionary helds the data of each track)
track_dict.update({no: tmp_df})
# now stack all lateral, longitudinal as well as elevation values in list, this is needed for later plotting the
# original data of tracks
lat_all.append(lat)
lon_all.append(lon)
elev_all.append(elev)
# reset all lists for next loop
lat, lon, elev, cum_elevation, dur, times, dist_per_point, s, speed_filt, distance, speed, dur_s = \
[], [], [], [], [], [], [], [], [], [], [], []
# reset pandas data frame as well as data per track for next loop
tmp_df = pd.DataFrame(columns=[])
# calcuate the number of read tracks
N0_TRACKS = len(track_dict)
# extract values in tables with multiple of DISTANCE and save coordinates in array
# now at first iterate over the number of tracks
for i in range(N0_TRACKS):
# take only the way points with a specific distance, defined in DISTANCE
track_const_distance.update({i: track_dict[i][track_dict[i]['match multiple'] == 1]})
# and then extract from this list the following data to separate lists
lat_red_all.append(track_const_distance[i]['lateral'].tolist())
lon_red_all.append(track_const_distance[i]['longitudinal'].tolist())
distance_from_start_red_all.append(track_const_distance[i]['distance from start [m]'].tolist())
# store the track length of each individual track, needed for later range check
NO_LEN_TRACKS.append(len(track_const_distance[i]))
# now stack all lat, lon and distance from start information of all tracks (reduced points - multiple of DISTANCE)
# this is indicated by the _red_ name in variables
sum_lat, sum_lon, sum_distance_from_start = [], [], [] # reset the temporary lists
for i in range(N0_TRACKS): # interate over all tracks
sum_lat += (lat_red_all[i]) # stack the lateral data
sum_lon += (lon_red_all[i]) # and longitudinal data
sum_distance_from_start+=(distance_from_start_red_all[i]) # and distance from start at each point
# define some constants for later calculation
LEN_ALL_POINTS = len(sum_lat)
NO_LEN_TRACKS_WITH_0 = NO_LEN_TRACKS.copy() # calculate the number of tracks and the length of each track
NO_LEN_TRACKS_WITH_0.insert(0, 0) # insert 0 for later calculation
MIN_LEN_TRACK = min(NO_LEN_TRACKS) # number of way points of shortest track
RANGE_PERMUT = list(permutations(list(range(0, N0_TRACKS)))) # make a list of possible combinations for later check
# if a combination of points is plausible
# make dictionary of ranges for each track, because they are her available in one list
# the output is e.g. {0: range(0, 256), 1: range(256, 541), 2: range(541, 836), 3: range(836, 1108)}
# first track as a range from 0...255, the second track a range of 256..541, etc.
for i in range(0, N0_TRACKS):
start = sum(NO_LEN_TRACKS_WITH_0[0:i + 1]) # calculate the start address
end = sum(NO_LEN_TRACKS_WITH_0[1:i + 2]) # and end address
print((start, end)) # output value in console
range_dict.update({i: range(start, end)}) # update dictionary: now with start and end
# now set the new index for each track according to calculated start and end, because this data frame
# helds now reduced number of way points (multiple of DISTANCE) and hence a re-index is required
track_const_distance[i].index = range(start, end)
# do the "nearest neighbor" analysis with all way points of all tracks (stacked)
# depending on how many tracks need to be compared
# make 2d array with coordinates of ALL tracks (stacked!)
X = np.c_[sum_lat, sum_lon]
# train and fit the model
nbrs = NearestNeighbors(n_neighbors=N0_TRACKS, algorithm='ball_tree').fit(X)
# get the results of the analysis
distances, indices = nbrs.kneighbors(X)
# and put data of the indices in pandas data frame
nbrs_pd = pd.DataFrame(indices)
# do the range check for every nearest neighbor to check whether the tuple of indices returned by the nearest neighbor
# analysis is plausible
nbrs_pd['common'] = list(f_rangeCheck(nbrs_pd))
# and filter for members where plausible neighbors were found
nbrs_common = nbrs_pd[nbrs_pd['common'] == 1]
# let all tracks now start where the first common point in all tracks was found
for i in range(N0_TRACKS):
#define start index, where tracks are equal onwards, ideally nbrs_common.index[0] works fine...
# e.g. [8, 264, 550, 859] which means: the common part of the tracks begin at 8@fist track, 264@second track
# 550@thrid track and 859@fourth track
print(nbrs_common.head(15))
idx = int(input('index please'))
START_INDEX_COMMON.append(nbrs_common.loc[nbrs_common.index[idx],i])
# filter table with information from start (where all tracks are common and copy the data to new pandas data frame
track_const_distance_common[i]=track_const_distance[i].loc[START_INDEX_COMMON[i]::, :]
# and drop not required coloums because they are re-calculated (e.g. distance from start would be now wrong because
# some points have been removed due to the reduction of the way points according to DISTANCE
track_const_distance_common[i]=track_const_distance_common[i].drop(['distance from start [m]','match multiple'],axis=1)
# calculate the length of each track and store it in list
NO_LEN_TRACKS_COMMON.append(len(track_const_distance_common[i]))
# number of way points of shortest track - this is need because the shortes track defines the lenght where a
# comparison is possible
MIN_LEN_TRACK_COMMON = min(NO_LEN_TRACKS_COMMON)
# now adjust all tracks on the length of the shortest track that a comparison of tracks is possible
for tr in range(N0_TRACKS):
track_const_distance_common[tr] = track_const_distance_common[tr].loc[START_INDEX_COMMON[tr]:START_INDEX_COMMON[tr]+MIN_LEN_TRACK_COMMON-1:, :]
# now generate elapsed time@way point as well as distance traveled since start for each track
# iterate of the number of tracks
for tr in range(N0_TRACKS):
# no iterate over the individual track
for cnt,idx in enumerate(track_const_distance_common[tr].index):
# the first element need a special handling for later difference calcluation
if cnt==0:
# duration at the fist common point is set to 0
dur.append(timedelta(hours=0,minutes=0,seconds=0))
#dur.append(0)
# and store the value as a start time
START_TIME=track_const_distance_common[tr].loc[idx,'times [h/m/s]']
else:
# calculate the elapsed time of each way point against START_TIME and store it in list
times=track_const_distance_common[tr].loc[idx,'times [h/m/s]']
dur.append(timedelta(hours=times.hour,
minutes=times.minute,
seconds=times.second) -
timedelta(hours=START_TIME.hour,
minutes=START_TIME.minute,
seconds=START_TIME.second))
# store the list (elapsed time of each point compared to START_TIME)
track_const_distance_common[tr]['elapsed time']=dur
# and make a addtional column with fixed travled distance, based on DISTANCE
track_const_distance_common[tr]['traveled_distande [m]']=[x * DISTANCE for x in range(0, MIN_LEN_TRACK_COMMON)]
# reset temporary variables
dur,START_TIME=[],0
# now copy the time/speed information from track_const_distance_common in times_pd/speeds_pd for later time comparison
for i in range(N0_TRACKS):
# move the elapesed time track by track to times_pd by using a suitable name for each column
times_pd.loc[:,'Track_'+str(i)]=track_const_distance_common[i]['elapsed time'][0:MIN_LEN_TRACK].values
# do the same for the speed information in another pandas data frame
speeds_pd.loc[:, 'Track_' + str(i)] = track_const_distance_common[i]['filt speed [km/h]'][0:MIN_LEN_TRACK].values
# now add the distance, fixed steps, used for the x axes when plotting the data
times_pd['Distance [m]']=[x * DISTANCE for x in range(0, MIN_LEN_TRACK_COMMON)]
speeds_pd['Distance [m]']=[x * DISTANCE for x in range(0, MIN_LEN_TRACK_COMMON)]
# START time correction -----------------------------------------------------------------------------------
times_l=[]
track_corr_l = []
time_cum = timedelta(hours=0,minutes=0,seconds=0)
# iterate now over the available tracks
for tr in times_pd.columns[0:N0_TRACKS]:
# and than over the several way points within the track
for i in range(1,len(times_pd.index)):
# split the current time information of the way point - only time is required
#tmp = str(times_pd.loc[i,tr]).split(' ')[-1]
h1,min1,sec1 = [int(i) for i in str(times_pd.loc[i,tr]).split(' ')[-1].split(':')]
# and the time information from previous way point - only time is required
#tmp = str(times_pd.loc[i-1,tr]).split(' ')[-1]
h0,min0,sec0 = [int(i) for i in str(times_pd.loc[i-1,tr]).split(' ')[-1].split(':')]
# calculate the delay between the way points in the current track
delta = (timedelta(hours=h1,minutes=min1,seconds=sec1) -
timedelta(hours=h0,minutes=min0,seconds=sec0))
# record the last calculated time difference as long the time_l list exist
if len(times_l)!=0:
cur_diff = times_l[-1]
# if the calculated time difference is lager than on minute (assumuming pausing time)
if delta > timedelta(seconds=59):
# use the previous information
times_l.append(cur_diff)
else:
# calculated time difference is in valid range, hence append the newly calculated delta time
times_l.append(timedelta(hours=h1,minutes=min1,seconds=sec1) -
timedelta(hours=h0,minutes=min0,seconds=sec0))
time_cum += times_l[-1] # sum the time differences of the track
track_corr_l.append(time_cum)
# insert NULL values at the beginning due to the fact, that the previous calcualtion does not start from the
# the beginning but a starting value of 00:00:00 is required in the newly calculated columns
times_l.insert(0,timedelta(seconds=0))
track_corr_l.insert(0, timedelta(seconds=0))
# now add the newly calculated "difference between way points" per track
# and the corrected duration within the track in a pandas data frame
times_pd[str(tr)+'_corr']=track_corr_l
# now reset values for next loop to avoid uncontrolled addition of values
times_l=[]
track_corr_l=[]
time_cum = timedelta(hours=0, minutes=0, seconds=0)
# now rearange the columns again - matplot lib part expects the columns to be displayed in a specific order
column_list = list(times_pd.columns)[-N0_TRACKS::1] + ['Distance [m]'] + list(times_pd.columns[:N0_TRACKS])
times_pd=times_pd.reindex(columns=column_list)
# END time correction --------------------------------------------------------------------------------------
# now make a pandas dataframe with calculation of the time distance compared between tracks
# interate now only over the tracks, because the pandas dataframe contains also the speed info
# difference calculation is based on seconds(!) and no time object
tr = times_pd.columns[1:N0_TRACKS] # iterate from second track...last track. The first track is the reference!
for t in tr: # iterate now over each individual track
# first one is set to zero, that later difference calculation is possible
time_diff.append(0)
# loop over the current track
for i in list(times_pd.index)[1::]: # ignore the first element
# and add the time difference between way points of current track to linked way point of referenced track
time_diff.append(float((times_pd.loc[i, 'Track_0_corr'].total_seconds() - times_pd.loc[i, t].total_seconds())))
# once loop over track is completed move the ist to pandas data frame
times_pd['diff_track_' + str(t)] = time_diff
# and reset the temp buffer for next loop
time_diff = []
## -------------------------------------------------- the data -------------------------------------------------------
#
## --------------------------------------------- 'original tracks ----------------------------------------------------
for i in range(0, N0_TRACKS):
matched_points_of_tracks.append(nbrs_common[i].tolist())
plt.figure(2)
plt.title('original tracks')
plt.ylabel('lateral')
plt.xlabel('longitudinal')
for i in range(0, len(lat_all)):
plt.scatter(lon_all[i], lat_all[i],
c=cols_dict[i],
s=2,
label=file_names[i])
plt.legend(loc='upper left', markerscale=6)
plt.show()
## ---------------------------------------- 'common sections' --------------------------------------------------------
#plt.figure(3)
#plt.title('common sections')
#plt.ylabel('lateral')
#plt.xlabel('longitudinal')
#
## loop over all tracks
#for i in range(0, N0_TRACKS):
# # and plot each track and give plot a name
# plt.scatter(lon_red_all[i], lat_red_all[i],
# c=cols_dict[i],
# s=5,
# label=file_names[i])
# # plot the common sections
# for no, p in enumerate(matched_points_of_tracks[i]):
# plt.scatter(sum_lon[p], sum_lat[p], c=cols_dict[10], s=5)
#
# # every km and only for track one to avoid to many text
# if no%(1000/DISTANCE)==0 and i==0:
# # plot the distance from start
# plt.text(sum_lon[p],sum_lat[p],s=str(round(sum_distance_from_start[p]/1000,1)))
#plt.legend(loc='upper left', markerscale=6)
#plt.show()
# --------------------------------- time advantage/lag over distance and elevation ------------------------------------
fig, ax1 = plt.subplots()
ax1.set_title(' time advantage/lag over distance')
ax1.set_xlabel('traveled distance [km]')
ax1.set_ylabel(' time [min]')
ax1.grid(True)
for i in range(1,N0_TRACKS):
idx=times_pd.columns[i+N0_TRACKS*2] # start column = diff_track_Track_1 up to diff_track_Track_x
print(idx)
ax1.plot(times_pd['Distance [m]']/1000, # x axes = distance
times_pd[idx]/60,c=cols_dict[i], # y axes = calculated difference of current track to referenced track
label=file_names[i])
ax1.plot(times_pd['Distance [m]']/1000, # plot the reference line
[0]*len(times_pd),
c=cols_dict[0],
label=file_names[0]+'_REF')
ax1.legend(loc='upper left',markerscale=6) # place the legend
ax2=ax1.twinx() # second y axis for elevation required hence share the y axes
ax2.plot(times_pd['Distance [m]']/1000, # plot the x axes
track_const_distance_common[0]['elevation [m]'], # and plot the elevation a second y axes
c='black',
linewidth=3)
ax2.set_ylabel('elevation [m]') # set the name of the y axes
plt.show()
## ------------------------------------------------ speeds/ average speed --------------------------------------
#plt.figure(7)
#plt.title('speeds along track')
#plt.xlabel(' km ')
#plt.ylabel('speed')
#for i in range(0,N0_TRACKS):
# plt.plot(times_pd['Distance [m]']/1000,
# track_const_distance_common[i]['filt speed [km/h]'],
# label=file_names[i])
# plt.plot(times_pd['Distance [m]']/1000,
# [track_const_distance_common[i]['filt speed [km/h]'].mean()]*len(times_pd['Distance [m]']),
# c=cols_dict[i])
# y_text=track_const_distance_common[i]['filt speed [km/h]'].tolist()[-1]
#plt.legend(loc='upper left')
#plt.show()