forked from jwdj/EasyABC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
aligner.py
335 lines (297 loc) · 13.3 KB
/
aligner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
import re
import string
import os
from fractions import Fraction
import sys
if sys.version_info.major > 2:
basestring = str
bar_sep_symbols = ':|][|: :|[2 :|]2 :||: [|] :|] [|: :|| ||: :|: |:: ::| |[1 :|2 |] || [| :: .| |1 |: :| [1 [2 |'.split()
bar_sep = re.compile('(%s)' % '|'.join(' *' + re.escape(x)+' *' for x in bar_sep_symbols))
bar_sep_without_space = re.compile('(%s)' % '|'.join(re.escape(x) for x in bar_sep_symbols))
bar_and_voice_overlay_sep = re.compile('(%s)' % '|'.join(' *' + re.escape(x)+' *' for x in (bar_sep_symbols + ['&'])))
def get_default_len(abc):
if re.search(r'(?m)^L: *mcm_default', abc):
return 'mcm_default'
else:
m = re.search(r'(?m)^L: *(\d+)/(\d+)', abc)
if m:
return Fraction(int(m.group(1)), int(m.group(2)))
else:
return Fraction(1, 8)
def get_metre(abc):
m = re.search(r'(?m)^M: *(\d+)/(\d+)', abc)
if m:
return Fraction(int(m.group(1)), int(m.group(2)))
else:
return Fraction(4, 4)
def get_key(abc):
m = re.search(r'(?m)^K: *(\w+)', abc)
if m:
return m.group(1)
else:
return 'C'
def remove_non_note_fragments(abc):
# replace non-note fragments of the text by replacing them by spaces (thereby preserving offsets), but keep also bar and repeat symbols
abc = re.sub(r'(?m)%.*$', '', abc) # remove comments
abc = re.sub(r'\[\w:.*?\]', '', abc) # remove embedded fields
abc = re.sub(r'\\"', '', abc) # remove escaped " characters
abc = re.sub(r'".*?"', '', abc) # remove strings
abc = re.sub(r'\{.*?\}', '', abc) # remove grace notes
abc = re.sub(r'!.+?!', '', abc) # remove ornaments like eg. !pralltriller!
abc = re.sub(r'\+.+?\+', '', abc) # remove ornaments like eg. +pralltriller+
return abc
def replace_chords_by_first_note(abc):
# replace "[AD]2 [B2C2e2]" by "A2 B2" - the first note in each chord
abc = remove_non_note_fragments(abc)
note_pattern = r"(?P<note>([_=^]?[A-Ga-gxz](,+|'+)?))(?P<length>\d{0,2}/\d{1,2}|/+|\d{0,2})(?P<broken>[><]?)"
def sub_func(m):
match = re.search(note_pattern, m.group(0))
if match:
return match.group(0)
else:
return ''
return re.sub(r'\[.*?\]', sub_func, abc)
def get_bar_length(abc, default_length, metre):
abc = remove_non_note_fragments(abc)
abc = replace_chords_by_first_note(abc)
# 1.3.6.5 [JWDJ] 2015-12-19 not only triplet support but also handling of other tuplets
note_pattern = r"(?P<note>([_=^]?[A-Ga-gxz](,+|'+)?))(?P<length>\d{0,3}(?:/\d{0,3})*)(?P<dot>\.*)(?P<broken>[><]?)"
tuplet_pattern = r"\((?P<p>[1-9])(?:\:(?P<q>[1-9]?))?(?:\:(?P<r>[1-9]?))?" # put p notes into the time of q for the next r notes
total_length = Fraction(0)
last_broken_rythm = ''
tuplet_notes_left = 0 # how many notes in the current tuplet are we yet to see
tuplet_time = 2
for match in re.finditer(r'(%s)|(%s)' % (note_pattern, tuplet_pattern), abc):
n = match.group(0)
if n[0] == '(':
tuplet_notes = int(match.group('p'))
tuplet_time = match.group('q')
if tuplet_time:
tuplet_time = int(tuplet_time)
else:
if tuplet_notes in [3, 6]:
tuplet_time = 2
elif tuplet_notes in [2, 4, 8]:
tuplet_time = 3
else: #elif tuplet_notes in [5, 7, 9]:
if metre.numerator % 3 == 0:
tuplet_time = 3 # for compound meter 6/8, 9/8, 12/8, etc
else:
tuplet_time = 2
tuplet_notes_left = match.group('q')
if tuplet_notes_left:
tuplet_notes_left = int(tuplet_notes_left)
else:
tuplet_notes_left = tuplet_notes
continue
length = match.group('length')
if isinstance(default_length, basestring) and default_length == 'mcm_default':
length = length.split('/')[0] # ignore any fraction
multiplier = Fraction(1, int(length))
for dot in match.group('dot'):
multiplier = multiplier * Fraction(3, 2)
total_length = total_length + multiplier
else:
multiplier = Fraction(1)
broken_rythm = match.group('broken')
if broken_rythm == '>' or last_broken_rythm == '<':
multiplier = Fraction(3, 2)
elif broken_rythm == '<' or last_broken_rythm == '>':
multiplier = Fraction(1, 2)
last_broken_rythm = broken_rythm
# 1.3.6.5 [JWDJ] 2015-12-19 divisor parsed similar to abcm2ps
dividend = length.split('/')[0]
if dividend:
multiplier = multiplier * Fraction(int(dividend))
for divmatch in re.finditer(r'/(\d*)', length):
divisor = divmatch.group(1)
if divisor:
divisor = int(divisor)
else:
divisor = 2
multiplier = multiplier / Fraction(divisor)
if tuplet_notes_left:
multiplier = multiplier * Fraction(tuplet_time, tuplet_notes)
tuplet_notes_left -= 1
total_length = total_length + multiplier * default_length
return total_length
def is_likely_anacrusis(bar, default_length, metre):
if not bar or bar_sep.match(bar):
return False
actual_length = get_bar_length(bar, default_length, metre)
expected_length = metre
##print repr(bar), float(actual_length), float(expected_length)
return float(actual_length) <= float(expected_length)*0.8
def align_beams(bars):
n = len(bars)
bar_parts = [re.split(' +', b) for b in bars]
num_parts = min(len(p) for p in bar_parts)
for i in range(num_parts):
parts = [bar_parts[line_no][i] for line_no in range(n)]
max_len = max(len(p) for p in parts)
parts = [p.ljust(max_len) for p in parts]
for line_no in range(n):
bar_parts[line_no][i] = parts[line_no]
bars = [' '.join(bar_parts[line_no]) for line_no in range(n)]
return bars
def align_bars(bars, align_inside_bars_too=True):
if bar_sep.match(bars[0]):
bars = [' %s ' % b.strip() for b in bars]
elif align_inside_bars_too:
bars = align_beams(bars)
max_len = max(len(b) for b in bars)
return [b.ljust(max_len) for b in bars]
def align_bar_separators(bar_seps):
bar_seps = [' %s ' % bs.strip() for bs in bar_seps]
if any(':|' in bs for bs in bar_seps):
just_func = string.rjust
else:
just_func = string.ljust
if any('|' in bs for bs in bar_seps):
# try to center around the last occurance of '|'
max_pos_pipe = max(b.rfind('|') for b in bar_seps)
for i in range(len(bar_seps)):
p = bar_seps[i].rfind('|')
if 0 <= p < max_pos_pipe:
bar_seps[i] = (' ' * (max_pos_pipe-p)) + bar_seps[i]
max_len = max(len(b) for b in bar_seps)
return [b.ljust(max_len) for b in bar_seps]
else:
max_len = max(len(b) for b in bar_seps)
return [just_func(b, max_len) for b in bar_seps]
def split_line_into_parts(line):
parts = bar_sep.split(line)
parts = [p for p in parts if p]
return parts
def align_lines(whole_abc, lines, align_inside_bars_too=False):
n = len(lines)
line_parts = [split_line_into_parts(line.strip()) for line in lines]
# determine the number of bars and pad lines with fewer elements by '' strings
num_bars = max(len(lp) for lp in line_parts) + 1
for line_no, lp in enumerate(line_parts):
line_parts[line_no] += ['']
if len(lp) < num_bars:
line_parts[line_no] += [''] * (num_bars - len(lp))
if num_bars:
default_len = get_default_len(whole_abc)
metre = get_metre(whole_abc)
first_bar_handled = False
for i in range(num_bars):
# if the first bar with notes haven't been handled yet, check if we're currently seeing any anacrusis
if not first_bar_handled and any(re.search(r'[a-gA-Gxz]', line_parts[line_no][i]) for line_no in range(n)):
first_bar_handled = True
is_anacrusis = [is_likely_anacrusis(line_parts[line_no][i], default_len, metre) for line_no in range(n)]
##print is_anacrusis, default_len, metre, [line_parts[line_no][i] for line_no in range(n)]
# if at least one bar is an anacrusis and not all, then add '' for the bars the aren't
if any(is_anacrusis) and not all(is_anacrusis):
for line_no, is_ana in enumerate(is_anacrusis):
if not is_ana:
line_parts[line_no].insert(i, '')
# if some element is a bar and others aren't, then add a kind of pseudo element ('') in order for the alignment to work
any_is_bar_sep = any(bar_sep.match(line_parts[line_no][i]) for line_no in range(n))
if any_is_bar_sep:
for line_no in range(n):
if not bar_sep.match(line_parts[line_no][i]):
line_parts[line_no].insert(i, '')
bars = [line_parts[line_no][i] for line_no in range(n)]
if any_is_bar_sep:
bars = align_bar_separators(bars)
else:
bars = align_bars(bars, align_inside_bars_too=align_inside_bars_too)
for line_no in range(n):
line_parts[line_no][i] = bars[line_no]
lines = [''.join(line_parts[i]) for i in range(n)]
if all(l.startswith(' ') for l in lines):
lines = [l[1:] for l in lines]
return lines
def extract_incipit(abc, num_bars=2, num_repeats=999):
# split tune header and body (and remove non-embedded fields in the body, except for K: which is made embedded)
lines = re.split('\r\n|\r|\n', abc)
lines = [re.sub(r'\s*%.*$', '', line) for line in lines] # remove comments
header = []
body = []
in_body = False
for line in lines:
if in_body:
if re.match(r'[a-zA-Z]:', line):
if line.startswith('K:'):
line = '[%s]' % line # convert K: field to [K:]
else:
continue # ignore fields in the tune body
body.append(line)
elif re.match(r'[a-zA-Z]:', line):
header.append(line)
if line.startswith('K:'):
in_body = True
abc, header = ' '.join(body), os.linesep.join(header)
#print abc
#print '-'*10
#print header
#print '-'*10
default_len = get_default_len(header)
metre = get_metre(header)
key = get_key(header)
parts = split_line_into_parts(abc.strip())
inside_repeat_ending = False
L = [[]] # list of parts (bar seps and bars) for each repeat/ending
key_at_start = '[%s]' % key
last_seen_key = key_at_start
for part in parts:
L[-1].append(part)
m = re.search(r'\[K:.*?\]', part)
if m:
last_seen_key = m.group(0)
if bar_sep.match(part):
if re.search(r'\|\d', part):
inside_repeat_ending = True
if inside_repeat_ending and ('||' in part or '|]' in part):
inside_repeat_ending = False
L.append([]) # start new repeat/part
if last_seen_key != key_at_start:
L[-1].append(last_seen_key)
key_at_start = last_seen_key
elif '|:' in part:
L[-1].pop()
L.append([]) # start new repeat/part
if last_seen_key != key_at_start:
L[-1].append(last_seen_key)
key_at_start = last_seen_key
# if last bar was anacrusis, move it over to the current repeat/part
if len(L) > 1 and len(L[-2]) >= 1 and is_likely_anacrusis(L[-2][-1], default_len, metre):
L[-1].append(L[-2][-1])
del L[-2][-1]
L[-1].append(part)
def extract_bars(parts, num_bars, default_len, metre):
result = []
bar_count = 0
first_bar_handled = False
for part in parts:
result.append(part)
if re.search(r'[a-gA-Gxz]', part) and not re.match(r'\s*\[\w:.*?\]\s*', part):
if first_bar_handled:
bar_count += 1
else:
first_bar_handled = True
if is_likely_anacrusis(part, default_len, metre):
bar_count = 0
else:
bar_count = 1
elif num_bars == bar_count:
break
return (' '.join(result)).strip()
L = [extract_bars(parts, num_bars, default_len, metre) for parts in L]
L = [x.strip() for x in L if x.strip()]
L = L[:num_repeats]
return L
if __name__ == "__main__":
s = '''X:32
T:Kadrilj2
M:2/4
L:1/16
K:F
u(AB)|(cd)cB ABcd|c2f2 a4|(ag)(gf) (fe)(ed)|
d6 c2|(Bc)BA (GA)Bc|d2g2 b4|(ba)(ag) (gf)eg|f4 z2:|
K:Bb
|:F2|F4 d2Bc|d2FA B2B,C|(DB,)B,B, (DB,)B,B,|F2F2 F4|
e4 g2cd|e2AB c2FG|({B}(A)F)FF (AF)FF|(dB)BB B2:| '''
print(extract_incipit(s, num_repeats=2, num_bars=2))