-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcallsign_regex.py
executable file
·135 lines (124 loc) · 5.48 KB
/
callsign_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
#
# Parses prefixes and regular expressions from the ITU callsign database at
# https://www.itu.int/en/ITU-R/terrestrial/fmd/Pages/call_sign_series.aspx
#
# This follows the format described in https://en.wikipedia.org/wiki/Amateur_radio_call_signs
# however does not support several very special cases like "D9K", "C6A*" or "H2T"
import collections
import json
import openpyxl
import os
import re
import string
import sys
OUTPUT_DIR = 'generated/'
def write_utf8_file(fn, string):
os.makedirs(OUTPUT_DIR, exist_ok=True)
f = open(OUTPUT_DIR + fn, 'wt', encoding='utf-8')
f.write(string)
# Extract the following info of a group from an excel row:
#
# - is_full = contains all 26 characters of the group, e.g. KAA-KAZ
# - prefix = common prefix for full group, first prefix of non-full (first and last letters will be added)
# - regex = string of the (sub)regex for this group
# - country = country that got the group allocation
def analyze_row(row):
prefix, country = row
first, last = prefix.value.split(" - ")
group = { 'is_full': False, 'country': country.value, 'regex': None }
if len(first) != 3 or len(last) != 3:
raise ValueError("Prefixes must be 3 characters: '%s'" % prefix)
if first[:2] != last[:2]:
raise ValueError("Prefixes must start with the same characters: '%s'" % prefix)
if first[2] == 'A' and last[2] == 'Z':
group['is_full'] = True
group['prefix'] = first[:2]
group['regex'] = first[:2] + '[A-Z]?' # third letter is optional in full groups
else:
group['is_full'] = False
group['prefix'] = first
group['first'] = first[2]
group['last'] = last[2]
group['regex'] = first[:2] + '[%s-%s]' % (first[2], last[2])
return group
def prefix_list(group):
if group['is_full']:
return [group['prefix']]
else:
return [group['prefix'][:2] + chr(ch) for ch in range(ord(group['first']), ord(group['last'])+1)]
def load_xls(fn):
cssr = openpyxl.load_workbook(fn, data_only=True)
sheet = cssr.worksheets[0]
prefixes = {}
countries = collections.defaultdict(set)
letters = collections.Counter()
regexes = set()
if sheet['A1'].value != "Series" or sheet['B1'].value != "Allocated to":
raise ValueError("Excel header mismatch")
# load individual rows from the table and populate:
# - prefixes (a dict of prefix->data)
# - letters (count of the first letters' occurence in each prefix)
# - regexes (list of alternative prefixes as regexes for latter matching)
for row in sheet['A2':'B%d' % sheet.max_row]:
group = analyze_row(row)
group_prefixes = prefix_list(group)
countries[group['country']].update(group_prefixes)
for prefix in group_prefixes:
prefixes[prefix] = group
first_char = group['prefix'][0]
letters[first_char] = letters[first_char] + 1
del group['prefix']
# special-case for single-letter allocations: check through the alphabet
for first in string.ascii_uppercase:
is_single_letter_prefix = True
prefix = first + 'A'
if letters[first] == 26 and prefix in prefixes and prefixes[prefix]['is_full']:
ref = prefixes[prefix]
for second in string.ascii_uppercase[1:]:
prefix = first + second
if not prefix in prefixes:
is_single_letter_prefix = False
break
if ref['country'] != prefixes[first + second]['country']:
is_single_letter_prefix = False
break
if not prefixes[prefix]['is_full']:
is_single_letter_prefix = False
break
if is_single_letter_prefix:
ref['regex'] = first + '[A-Z]{0,2}' # second and third are optional
country = prefixes[prefix]['country']
for second in string.ascii_uppercase:
del prefixes[first + second]
countries[country].remove(first + second)
prefixes[first] = ref
countries[country].add(first)
for prefix, group in sorted(prefixes.items()):
regexes.add(group['regex'])
# create a regex that matches all valid callsigns and captures the country prefix
regex_str = "(%s)[0-9][0-9A-Z]{0,3}[A-Z]" % "|".join(sorted(regexes))
regex = re.compile(regex_str)
write_utf8_file('callsigns.regex', regex_str + '\n')
# store prefix map
write_utf8_file('prefixes.dense.json', json.dumps(prefixes, ensure_ascii=False))
write_utf8_file('prefixes.pretty.json', json.dumps(prefixes, indent=4, ensure_ascii=False))
# convert sets to lists for JSON export and export per-country prefix lists
countries = {key: sorted(list(val)) for key, val in countries.items()}
write_utf8_file('countries.dense.json', json.dumps(countries, ensure_ascii=False))
write_utf8_file('countries.pretty.json', json.dumps(countries, indent=4, ensure_ascii=False))
return regex, prefixes
regex, prefixes = load_xls(sys.argv[1])
# test command line arguments for callsign validity
for call in sys.argv[2:]:
call = call.upper()
m = regex.match(call)
if m:
match = m.group(1)
# find longest-matching prefix from RE capture group
while match and not match in prefixes:
match = match[:-1]
if match in prefixes:
print("%s - %s" % (call, prefixes[match]['country']))
continue
print("%s does not match" % call)