-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
nmea_data_convert.py
686 lines (483 loc) · 28.7 KB
/
nmea_data_convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
import argparse
import os
import re
import sys
from collections import namedtuple
from datetime import datetime
import numpy as np
import pandas as pd
import pynmea2
import db_creds
import db_data_import
import db_table_lists
import db_utils
from column_casting import columns_to_cast, datatype_dict, db_datatypes
# TODO: Overriding the print function isn't a good way to handle this, replace with a custom library that does this
import functools
print = functools.partial(print, flush=True) # Prevent print statements from buffering till end of execution
def parse_and_validate_args(passed_args):
parser = argparse.ArgumentParser()
parser.add_argument("filepath",
help="file system path to file containing NMEA data")
parser.add_argument("output_method",
choices=['csv', 'db', 'both'],
help="Where to output data: CSV files, database, or both")
parser.add_argument("--cycle_start", "-cs",
help="Talker+sentence_type, e.g. 'GNRMC', used to key off of for sentence merging and more; "
"must appear once and only once in each cycle, and must be at the beginning of each cycle; "
"must contain date and time information for sentences to be datetime stamped")
parser.add_argument("--num_sentences_per_cycle", "-spc", # Requirement for this is enforced in assign_cycle_ids()
type=int,
help="If the cycle_start argument is not provided, and sentences are not all of type GSV, "
"cycles will be inferred from this argument. Every num_sentences_per_cycle will be given "
"the same cycle_id starting with the first sentence. Sentence merging is based on cycle_id.")
parser.add_argument("--backfill_datetimes", "-bfdt",
action="store_true",
help="Backfill datetimes where missing by extrapolating/interpolating from messages "
"that do have datetime information")
parser.add_argument("--drop_previous_db_tables", "-dropt",
action="store_true",
help="Drop all previous DB tables before importing new data; only applies when output_method is 'db' or 'both'")
parser.add_argument("--unique_id", "-uid",
type=str,
default=None,
help="A unique ID that will be added as a column in all database tables and CSV output files;"
" allows this dataset to be distinguished from other datasets when they are stored together")
args = parser.parse_args(passed_args)
# Check if input file exists
if not os.path.isfile(args.filepath):
sys.exit(f"\nERROR: File '{args.filepath}' does not exist.\n\nExiting.\n")
# Check if num_sentences_per_cycle value is valid
if args.num_sentences_per_cycle is not None:
args.num_sentences_per_cycle = int(args.num_sentences_per_cycle)
if args.num_sentences_per_cycle < 1:
sys.exit(f"\nERROR: num_sentences_per_cycle argument '{args.num_sentences_per_cycle}' is invalid. Must be a positive "
"integer greater than 0.\n\nExiting.\n")
return args
def open_file(filepath):
return open(filepath, encoding='utf-8')
def read_file(file):
sentences = []
for line_idx, line in enumerate(file.readlines()):
try:
sentence = pynmea2.parse(line)
sentences.append(sentence)
except pynmea2.ParseError as e:
print(f'Parse error on line {line_idx+1}: {e}')
continue
if len(sentences) == 0:
sys.exit(f"\nNo data found in {file.name} input file.\nExiting.\n\n")
return sentences
def categorize_sentences(sentences):
# Make a list of all sentence types
sentence_types = []
for dts_sentence in sentences:
if get_sentence_type(dts_sentence.sentence) not in sentence_types:
sentence_types.append(get_sentence_type(dts_sentence.sentence))
# Create a list of sentence sets (a separate list for each sentence type)
sentence_sets = [[] for _ in range(len(sentence_types))]
for dts_sentence in sentences:
type_idx = sentence_types.index(get_sentence_type(dts_sentence.sentence))
sentence_sets[type_idx].append(dts_sentence) # Add each sentence to the appropriate list
return sentence_sets
# Look for the beginning of cycles, get the timestamp for each cycle,
# pair that timestamp with each sentence from the cycle
# This function assumes that the sentence that starts the cycle contains a date and time stamp
# which may only be true for RMC sentences
def datetime_stamp_sentences(sentences, cycle_start='GNRMC'):
datetime_stamped_sentences = []
date_time = None
for sentence in sentences:
# TODO: Search through cycle to find sentence(s) containing date and/or time
# If sentence is the first in a cycle, get the timestamp
if sentence.talker + sentence.sentence_type == cycle_start:
if hasattr(sentence, 'timestamp'):
time = sentence.timestamp
else:
time = None
if hasattr(sentence, 'datestamp'):
date = sentence.datestamp
else:
date = None
if date and time: # Sentence contains both date and time
date_time = datetime.combine(date, time)
datetime_stamped_sentence = DateTimeStampedSentence(sentence, date_time)
datetime_stamped_sentences.append(datetime_stamped_sentence)
return datetime_stamped_sentences
# To properly assign cycle IDs, the cycle_start talker+sentence_type must appear once and only once in each cycle
def assign_cycle_ids(dts_sentences, args):
# TODO: Check database for highest cycle_id and start there so that IDs will be unique across runs of this script
cycle_start=args.cycle_start
if not cycle_start:
cycle_start = 'GNRMC'
unique_talker_and_type_pairs = set([dts_sentence.sentence.talker + dts_sentence.sentence.sentence_type for dts_sentence in dts_sentences])
if cycle_start in unique_talker_and_type_pairs: # If sentences contain cycle_start sentences
cycle_id = -1
for sen_idx, dts_sentence in enumerate(dts_sentences):
if dts_sentence.sentence.talker + dts_sentence.sentence.sentence_type == cycle_start:
cycle_id += 1
dts_sentences[sen_idx].cycle_id = cycle_id
elif len(unique_talker_and_type_pairs) == 1 and \
(('GPGSV' in unique_talker_and_type_pairs) or ('GLGSV' in unique_talker_and_type_pairs)): # If sentences are exclusively 'GPGSV' xor 'GLGSV' sentences
cycle_id = -1
for sen_idx, dts_sentence in enumerate(dts_sentences):
if int(dts_sentence.sentence.msg_num) == 1:
cycle_id += 1
dts_sentences[sen_idx].cycle_id = cycle_id
else:
num_sentences_per_cycle = args.num_sentences_per_cycle
if num_sentences_per_cycle is None:
sys.exit("\n\nERROR: If an argument for cycle_start is not provided or is not valid, and sentences are not exclusively "
"GPGSV xor GLGSV sentences, then the num_sentences_per_cycle argument must be provided.\n\nExiting.\n")
cycle_id = -1
cycle_start_idxs = range(0, len(dts_sentences), num_sentences_per_cycle)
for sen_idx, dts_sentence in enumerate(dts_sentences):
if sen_idx in cycle_start_idxs:
cycle_id += 1
dts_sentences[sen_idx].cycle_id = cycle_id
return dts_sentences
# For sentences from a particular sentence_set (particular talker), if there are sentences of the same sentence_type
# from the same cycle, merge them into one sentence
def merge_groups(sentence_sets):
for set_idx, sentence_set in enumerate(sentence_sets):
sentence_type = sentence_set[0].sentence.sentence_type
if sentence_type in ['GSV', 'GSA']: # These are the supported sentence types that can be merged
cycle_ids = [dts_sentence.cycle_id for dts_sentence in sentence_set]
cycle_ids = list(set(cycle_ids)) # Keep unique cycle IDs only by converting to set and back to list
sentences_merged = []
for cycle_id in cycle_ids:
merge_group_sentences = [dts_sentence for dts_sentence in sentence_set if dts_sentence.cycle_id == cycle_id]
if len(merge_group_sentences) > 1:
if sentence_type == 'GSV':
merged_sentence = MergedSentence_GSV(merge_group_sentences)
if sentence_type == 'GSA':
merged_sentence = MergedSentence_GSA(merge_group_sentences)
sentence_sets[set_idx].append(merged_sentence)
sentences_merged = sentences_merged + merge_group_sentences
# Remove originals to prevent duplicates, have to do it after all have been processed to prevent indexing issues
for dts_sentence in sentences_merged:
sentence_sets[set_idx].remove(dts_sentence)
return sentence_sets
# Extrapolate missing datetimes
# Look at the first two consecutive cycles with datetimes, take the interval between them, and backfill datetimes
# to cycles without datetimes based on that interval, assuming no data interruptions
# The interval from the first talker+sentence_type dataframe will be used for all dataframes
def backfill_datetimes(sentence_dfs, verbose=False):
# Make sure sentences are in order by cycle, they may have gotten out of order when merged
sentence_dfs = sort_dfs(sentence_dfs, sort_by='cycle_id')
# Find time delta (interval) between sentence cyles
interval = None
for df in sentence_dfs:
if not interval:
for sen_idx, _ in enumerate(df['datetime']):
if df['datetime'][sen_idx] and sen_idx+1 < len(df['datetime'])-1 and \
df['datetime'][sen_idx+1] and df['cycle_id'][sen_idx] < df['cycle_id'][sen_idx+1]: # If this dts sentence and the next have valid datetimes
first_datetime = df['datetime'][sen_idx]
second_datetime = df['datetime'][sen_idx+1]
interval = second_datetime - first_datetime
if interval.total_seconds() > 0: # Don't look at sentences of same type in same cycle (interval will be 0)
break
else:
interval = None
if not interval:
if verbose:
print("\n Time delta between sentence cycles could not be determined, so datetimes will not be backfilled.")
return
pd.options.mode.chained_assignment = None # Suppress chained_assignment warnings, default='warn'
# See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# TODO: Investigate how to do this better and remove this warning suppression
for df_idx, df in enumerate(sentence_dfs):
# Find sentences without datetime value
datetime_bools = df['datetime'].isnull()
no_datetime_idxs = datetime_bools[datetime_bools].index.tolist()
# Find first sentence in this dataframe with datetime
first_datetime_idx = (~datetime_bools).idxmax()
# Get lists of indices to process
no_datetime_idxs_preceding = [idx for idx in no_datetime_idxs if idx < first_datetime_idx]
no_datetime_idxs_succeeding = [idx for idx in no_datetime_idxs if idx > first_datetime_idx]
# From first sentence with datetime, fill in preceding sentences moving backwards
no_datetime_idxs_preceding.reverse()
for sen_idx in no_datetime_idxs_preceding:
# If sentence is of same talker+sentence_type and different cycle
if df['cycle_id'][sen_idx] < df['cycle_id'][sen_idx+1]:
df['datetime'][sen_idx] = df['datetime'][sen_idx+1] - interval
# If sentence is of same talker+sentence_type and same cycle
if df['cycle_id'][sen_idx] == df['cycle_id'][sen_idx+1]:
df['datetime'][sen_idx] = df['datetime'][sen_idx+1]
df['datetime_is_interpolated'][sen_idx] = True
# Fill in succeeding sentences' datetimes
for sen_idx in no_datetime_idxs_succeeding:
# If sentence is of same talker+sentence_type and different cycle
if df['cycle_id'][sen_idx] > df['cycle_id'][sen_idx-1]:
df['datetime'][sen_idx] = df['datetime'][sen_idx-1] + interval
# If sentence is of same talker+sentence_type and same cycle
if df['cycle_id'][sen_idx] == df['cycle_id'][sen_idx-1]:
df['datetime'][sen_idx] = df['datetime'][sen_idx-1]
df['datetime_is_interpolated'][sen_idx] = True
# Derive custom data from NMEA data
def derive_data(sentence_dfs):
for df in sentence_dfs:
if df['sentence_type'][0] == 'RMC':
# Derive decimal degree lat/lon format from NMEA DDMM.MMMMM/DDDMM.MMMMM formats
df['latitude'] = df.apply(lambda row : get_coordinate(row['lat'], row['lat_dir'], 'lat'), axis = 1)
df['longitude'] = df.apply(lambda row : get_coordinate(row['lon'], row['lon_dir'], 'lon'), axis = 1)
if df['sentence_type'][0] == 'GNS':
df['mode_indicator_gps'] = df.apply(lambda row : get_position_mode(row['mode_indicator'], 'GPS' ), axis = 1)
df['mode_indicator_glonass'] = df.apply(lambda row : get_position_mode(row['mode_indicator'], 'GLONASS'), axis = 1)
df['mode_indicator_galileo'] = df.apply(lambda row : get_position_mode(row['mode_indicator'], 'Galileo'), axis = 1)
df['mode_indicator_beidou'] = df.apply(lambda row : get_position_mode(row['mode_indicator'], 'BeiDou' ), axis = 1)
def get_coordinate(coord, coord_dir, coord_type):
# 'lat' is in DDMM.MMMMM format, number of decimal places is variable
# 'lon' is in DDDMM.MMMMM format, number of decimal places is variable
if coord != coord: # If is NaN
return coord
if coord_type == 'lat':
degree = float(coord[:2])
minute = float(coord[2:])
if coord_type == 'lon':
degree = float(coord[:3])
minute = float(coord[3:])
coord = degree + minute/60
if coord_dir == 'S' or coord_dir == 'W':
coord = -coord
return coord
def get_position_mode(mode_indicator_str, constellation):
if constellation == 'GPS' :
return mode_indicator_str[0]
if constellation == 'GLONASS':
if len(mode_indicator_str) > 1:
return mode_indicator_str[1]
else:
return None
if constellation == 'Galileo':
if len(mode_indicator_str) > 2:
return mode_indicator_str[2]
else:
return None
if constellation == 'BeiDou' :
if len(mode_indicator_str) > 3:
return mode_indicator_str[3]
else:
return None
def add_uid_to_dfs(sentence_dfs, unique_id):
for df in sentence_dfs:
# Insert the unique ID as the first column
df.insert(0, 'unique_id', unique_id)
def sentences_to_dataframes(sentence_sets):
dfs = []
for set_idx, sentence_set in enumerate(sentence_sets):
sentence_type = sentence_sets[set_idx][0].sentence.sentence_type
sentence_is_merged = sentence_sets[set_idx][0].sentence_is_merged_from_multiple
fields = sentence_sets[set_idx][0].sentence.fields
# If first sentence is not a merged sentence, make sure that fields allow for merged sentences
if (sentence_type == 'GSV') and (not sentence_is_merged):
fields = expand_GSV_fields(fields)
elif (sentence_type == 'GSA') and (not sentence_is_merged):
fields = expand_GSA_fields(fields)
columns = [column_tuple[1] for column_tuple in fields]
# Add columns for data fields missing from class
# TODO: Fork pynmea2 module to correct
# Issure reported here: https://github.com/Knio/pynmea2/issues/118
if sentence_type == 'RMC':
columns.append('mode')
columns.insert(0, 'cycle_id')
columns.insert(1, 'datetime')
columns.insert(2, 'datetime_is_interpolated')
columns.insert(3, 'sentence_is_merged_from_multiple')
columns.insert(4, 'talker')
columns.insert(5, 'sentence_type')
list_of_data_rows = []
for dts_sentence in sentence_set:
row_data = dts_sentence.sentence.data.copy()
date_time = dts_sentence.date_time
row_data.insert(0, dts_sentence.cycle_id)
row_data.insert(1, date_time)
row_data.insert(2, dts_sentence.datetime_is_interpolated)
row_data.insert(3, dts_sentence.sentence_is_merged_from_multiple)
row_data.insert(4, dts_sentence.sentence.talker)
row_data.insert(5, dts_sentence.sentence.sentence_type)
# For GSV sentences with less data than others, fill with NaNs where there is no data
if sentence_type in ['GSV']:
placeholders = [np.NaN] * (len(columns) - len(row_data))
row_data = row_data + placeholders
# For non-merged GSA sentences with less data than merged sentences, fill with NaNs where there is no data
if sentence_type == 'GSA' and not sentence_is_merged:
placeholders = [np.NaN] * (len(columns) - len(row_data))
# Make sure SV ID data gets put in correct (GP vs GL) columns
sv_ids_in_sentence = row_data[8:20]
glonass_ids = range(65,96+1) # See notes in MergedSentence_GSA class
# Row data consists of: 6 elements inserted above, 2 elements, 12 SV IDs, 3 elements
# Columns/fields consist of: 6 elements inserted above, 2 elements, 12 GP SV IDs, 12 GL SV IDs, 3 elements
if len(set(sv_ids_in_sentence) & set(glonass_ids)): # If there are GLONASS SV IDs in the sentence
row_data = row_data[:-15] + placeholders + row_data[-15:]
else:
row_data = row_data[:-3] + placeholders + row_data[-3:]
list_of_data_rows.append(row_data)
df = pd.DataFrame(list_of_data_rows, columns=columns)
dfs.append(df)
pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.width', 180)
# Corrrect data types of columns
dfs = [correct_data_types(df) for df in dfs]
return dfs
def sort_dfs(dfs, sort_by='cycle_id', ascending=True):
for df_idx, _ in enumerate(dfs):
dfs[df_idx] = dfs[df_idx].sort_values(sort_by, ascending=ascending)
dfs[df_idx] = dfs[df_idx].reset_index(drop=True)
return dfs
def correct_data_types(df):
df.replace(to_replace=pd.NaT, value='', inplace=True) # Do this replace first because pd.Nat won't be replaced with np.NaN
df.replace(to_replace='', value=np.NaN, inplace=True)
# Cast dataframe data from strings to appropriate datatypes specified in columns_to_cast.py
# sentence_type = df['sentence_type'][0]
# for py_datatype in datatype_dict.keys():
# if (sentence_type, py_datatype) in columns_to_cast: # If key exists in dictionary
# for column in columns_to_cast[sentence_type, py_datatype]:
# df[column] = df[column].astype('float').astype(py_datatype)
# # Cast as float first to get around bug: https://stackoverflow.com/questions/60024262/error-converting-object-string-to-int32-typeerror-object-cannot-be-converted
df['datetime'].replace(to_replace=np.NaN, value=pd.NaT, inplace=True) # Needed for backfill_datetimes() to work properly
return df
def dfs_to_csv(sentence_dfs, input_file_path, verbose=False):
input_file_name = os.path.basename(input_file_path)
input_file_name = os.path.splitext(input_file_name)[0]
for df_idx, df in enumerate(sentence_dfs):
filename = f"{input_file_name}_{df['talker'][0]}{df['sentence_type'][0]}.csv"
df.to_csv(filename, index=False) # Save to cwd
if verbose:
if df_idx == 0: # If this is the first df
print(f"data from logfile '{input_file_path}' written to:")
print(" " + filename)
def dfs_to_db(sentence_dfs, input_file_path, verbose=False):
table_name_base = 'nmea'
# Pass lowercase 'talker_sentencetype' as table name suffixes
table_name_suffixes = [f"{df['talker'][0]}_{df['sentence_type'][0]}".lower() for df in sentence_dfs]
# Construct a dictionary of database datatypes for each column to import, using columns_to_cast
for df in sentence_dfs:
sentence_type = df['sentence_type'][0]
for py_datatype in datatype_dict.keys():
if (sentence_type, py_datatype) in columns_to_cast: # If key exists in dictionary
for column in columns_to_cast[sentence_type, py_datatype]:
db_datatypes[column] = datatype_dict[py_datatype] # Get database datatype for column
table_names = db_data_import.send_data_to_db(input_file_path, sentence_dfs, table_name_base, table_name_suffixes, dtypes=db_datatypes)
if verbose:
print(f"data from logfile '{input_file_path}' written to:")
for table_name in table_names:
print(f" '{table_name}' table in '{db_creds.DB_NAME}' database")
def get_sentence_type(sentence):
return sentence.talker + sentence.sentence_type
class DateTimeStampedSentence:
def __init__(self, sentence, date_time):
# TODO: Check if inputs are of correct types (use isinstance())
# https://stackoverflow.com/questions/14570802/python-check-if-object-is-instance-of-any-class-from-a-certain-module
self.cycle_id = None
self.sentence = sentence # 'sentence' is an instance of class from pynmea2, contains various attributes
self.date_time = date_time
self.datetime_is_interpolated = False
self.sentence_is_merged_from_multiple = False
def __str__(self):
return str(self.cycle_id) + ' ' + str(self.sentence) + ' ' + str(self.date_time)
class MergedSentence_GSV:
#TODO: Have MergedSentence_GSV inherit from DateTimeStampedSentence, or create new base MergedSentence class to inherit from ?
def __init__(self, merge_group):
self.cycle_id = merge_group[0].cycle_id
self.date_time = merge_group[0].date_time
self.datetime_is_interpolated = False
self.sentence_is_merged_from_multiple = True
Sentence = namedtuple('sentence', 'talker sentence_type fields data')
talker = merge_group[0].sentence.talker
sentence_type = merge_group[0].sentence.sentence_type
# Add fields for SVs 5-12. 12 SVs seems to be a common number of maximum supported SVs for GNSS devices
fields = expand_GSV_fields(merge_group[0].sentence.fields)
# Merge SV data from sentences after the first with the data from the first sentences
data = merge_group[0].sentence.data
data[1] = np.NaN # msg_num doesn't apply to merged sentence
for dts_sentence in merge_group[1:]:
data = data + dts_sentence.sentence.data[3:]
self.sentence = Sentence(talker, sentence_type, fields, data)
def __str__(self):
return str(self.cycle_id) + ' ' + str(self.sentence.talker) + ' ' + str(self.sentence.sentence_type) + ' ' + str(self.sentence.data) + ' ' + str(self.date_time)
class MergedSentence_GSA:
# TODO: Have MergedSentence_GSA inherit from DateTimeStampedSentence, or create new base MergedSentence class to inherit from ?
# Separate GSA sentences in the same cycle represent reporting for different constellations
# SV with IDs 1-32 are GPS, 65-96 are GLONASS.
# See https://www.u-blox.com/sites/default/files/products/documents/u-blox8-M8_ReceiverDescrProtSpec_%28UBX-13003221%29.pdf, Appendix A
# TODO: Support more than two GSA sentences per cycle, if necessary
def __init__(self, merge_group):
if len(merge_group) != 2:
raise Exception("Merging of two and only two GSA sentences per cycle is currently supported.")
self.cycle_id = merge_group[0].cycle_id
self.date_time = merge_group[0].date_time
self.datetime_is_interpolated = False
self.sentence_is_merged_from_multiple = True
Sentence = namedtuple('sentence', 'talker sentence_type fields data')
talker = merge_group[0].sentence.talker
sentence_type = merge_group[0].sentence.sentence_type
# Create additional sv_id01...sv_id12 fields for second constellation, and label prefix with 'gp' for GPS and 'gl' for GLONASS
# Other data is the same between sentences (as observed in limited data)
fields = expand_GSA_fields(merge_group[0].sentence.fields)
# Constellation reporting may not always be in the same order, e.g. GPS may be reported before GLONASS in some
# cycles and after GLONASS in others, so check values and determine which should go first
sentence1_sv_ids = [int(id_) for id_ in merge_group[0].sentence.data[2:14] if id_ != '']
glonass_ids = range(65,96+1)
if len(set(sentence1_sv_ids) & set(glonass_ids)): # If there are GLONASS SV IDs in the first sentence, switch them
merge_group = [merge_group[1], merge_group[0]]
# Merge data from sentences
data = merge_group[0].sentence.data[:-3] + merge_group[1].sentence.data[2:]
self.sentence = Sentence(talker, sentence_type, fields, data)
def __str__(self):
return str(self.cycle_id) + ' ' + str(self.sentence.talker) + ' ' + str(self.sentence.sentence_type) + ' ' + str(self.sentence.data) + ' ' + str(self.date_time)
# Add fields for SVs 5-16. See the Development Notes/Oddities section of README.md
def expand_GSV_fields(fields):
fields = list(fields) # Make mutable
fields_to_duplicate = [field for field in fields if field[0].endswith('4')] # Original GSV sentence supports 0-4 SVs, so copy and change fields for SV 4
for SV_idx in range(5, 16+1):
new_fields = [(re.sub(r'4$', str(SV_idx), field[0]), re.sub(r'4$', str(SV_idx), field[1])) for field in fields_to_duplicate]
fields = fields + new_fields
fields = tuple(fields) # Return to original immutable tuple state
return fields
# Create additional sv_id01...sv_id12 fields for second constellation, and label prefix with 'gp' for GPS and 'gl' for GLONASS
def expand_GSA_fields(fields):
fields = list(fields) # Make mutable
fields_to_duplicate = [field for field in fields if field[1].startswith('sv_id')]
gp_fields = [('GP ' + field[0], 'gp_' + field[1]) for field in fields_to_duplicate]
gl_fields = [('GL ' + field[0], 'gl_' + field[1]) for field in fields_to_duplicate]
fields = fields[:2] + gp_fields + gl_fields + fields[-3:] # Keep first two fields and last three fields, replacing what is in between
fields = tuple(fields) # Return to original immutable tuple state
return fields
# Do data processing that we will always want to do
def process_data_common(sentences, args):
dts_sentences = datetime_stamp_sentences(sentences, args.cycle_start)
# 'dts' -> 'datetime stamped'
dts_sentences = assign_cycle_ids(dts_sentences, args)
sentence_sets = categorize_sentences(dts_sentences)
sentence_sets = merge_groups(sentence_sets)
sentence_dfs = sentences_to_dataframes(sentence_sets)
return sentence_dfs
def main(passed_args=None):
args = parse_and_validate_args(passed_args)
print("\nReading in data... ", end="")
file = open_file(args.filepath)
sentences = read_file(file)
print("done.")
print("\nProcessing data... ", end="")
sentence_dfs = process_data_common(sentences, args) # Cycle starts with 'GNRMC' sentence
if args.backfill_datetimes:
backfill_datetimes(sentence_dfs, verbose=True)
derive_data(sentence_dfs)
add_uid_to_dfs(sentence_dfs, args.unique_id)
print("done.")
if (args.output_method == 'csv' or args.output_method == 'both'):
print("\nWriting data to CSVs... ", end="")
dfs_to_csv(sentence_dfs, args.filepath, verbose=True)
print("done.")
if (args.output_method == 'db' or args.output_method == 'both'):
if args.drop_previous_db_tables:
print()
db_utils.drop_db_tables(db_table_lists.nmea_tables, verbose=True)
print("\nWriting data to database... ", end="")
dfs_to_db(sentence_dfs, args.filepath, verbose=True)
print("done.")
print("\nAll done. Exiting.\n\n")
if __name__ == '__main__':
main()