-
Notifications
You must be signed in to change notification settings - Fork 0
/
sonify.py
307 lines (268 loc) · 10.6 KB
/
sonify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
'''
Methods which sonify annotations for "evaluation by ear".
All functions return a raw signal at the specified sampling rate.
'''
import numpy as np
from numpy.lib.stride_tricks import as_strided
from scipy.interpolate import interp1d
from . import util
from . import chord
def clicks(times, fs, click=None, length=None):
"""Returns a signal with the signal 'click' placed at each specified time
Parameters
----------
times : np.ndarray
times to place clicks, in seconds
fs : int
desired sampling rate of the output signal
click : np.ndarray
click signal, defaults to a 1 kHz blip
length : int
desired number of samples in the output signal,
defaults to ``times.max()*fs + click.shape[0] + 1``
Returns
-------
click_signal : np.ndarray
Synthesized click signal
"""
# Create default click signal
if click is None:
# 1 kHz tone, 100ms
click = np.sin(2*np.pi*np.arange(fs*.1)*1000/(1.*fs))
# Exponential decay
click *= np.exp(-np.arange(fs*.1)/(fs*.01))
# Set default length
if length is None:
length = int(times.max()*fs + click.shape[0] + 1)
# Pre-allocate click signal
click_signal = np.zeros(length)
# Place clicks
for time in times:
# Compute the boundaries of the click
start = int(time*fs)
end = start + click.shape[0]
# Make sure we don't try to output past the end of the signal
if start >= length:
break
if end >= length:
click_signal[start:] = click[:length - start]
break
# Normally, just add a click here
click_signal[start:end] = click
return click_signal
def time_frequency(gram, frequencies, times, fs, function=np.sin, length=None,
n_dec=1):
"""Reverse synthesis of a time-frequency representation of a signal
Parameters
----------
gram : np.ndarray
``gram[n, m]`` is the magnitude of ``frequencies[n]``
from ``times[m]`` to ``times[m + 1]``
Non-positive magnitudes are interpreted as silence.
frequencies : np.ndarray
array of size ``gram.shape[0]`` denoting the frequency of
each row of gram
times : np.ndarray, shape= ``(gram.shape[1],)`` or ``(gram.shape[1], 2)``
Either the start time of each column in the gram,
or the time interval corresponding to each column.
fs : int
desired sampling rate of the output signal
function : function
function to use to synthesize notes, should be :math:`2\pi`-periodic
length : int
desired number of samples in the output signal,
defaults to ``times[-1]*fs``
n_dec : int
the number of decimals used to approximate each sonfied frequency.
Defaults to 1 decimal place. Higher precision will be slower.
Returns
-------
output : np.ndarray
synthesized version of the piano roll
"""
# Default value for length
if times.ndim == 1:
# Convert to intervals
times = util.boundaries_to_intervals(times)
if length is None:
length = int(times[-1, 1] * fs)
times, _ = util.adjust_intervals(times, t_max=length)
# Truncate times so that the shape matches gram
n_times = gram.shape[1]
times = times[:n_times]
def _fast_synthesize(frequency):
"""A faster way to synthesize a signal.
Generate one cycle, and simulate arbitrary repetitions
using array indexing tricks.
"""
# hack so that we can ensure an integer number of periods and samples
# rounds frequency to 1st decimal, s.t. 10 * frequency will be an int
frequency = np.round(frequency, n_dec)
# Generate 10*frequency periods at this frequency
# Equivalent to n_samples = int(n_periods * fs / frequency)
# n_periods = 10*frequency is the smallest integer that guarantees
# that n_samples will be an integer, since assuming 10*frequency
# is an integer
n_samples = int(10.0**n_dec * fs)
short_signal = function(2.0 * np.pi * np.arange(n_samples) *
frequency / fs)
# Calculate the number of loops we need to fill the duration
n_repeats = int(np.ceil(length/float(short_signal.shape[0])))
# Simulate tiling the short buffer by using stride tricks
long_signal = as_strided(short_signal,
shape=(n_repeats, len(short_signal)),
strides=(0, short_signal.itemsize))
# Use a flatiter to simulate a long 1D buffer
return long_signal.flat
def _const_interpolator(value):
"""Return a function that returns `value`
no matter the input.
"""
def __interpolator(x):
return value
return __interpolator
# Threshold the tfgram to remove non-positive values
gram = np.maximum(gram, 0)
# Pre-allocate output signal
output = np.zeros(length)
time_centers = np.mean(times, axis=1) * float(fs)
for n, frequency in enumerate(frequencies):
# Get a waveform of length samples at this frequency
wave = _fast_synthesize(frequency)
# Interpolate the values in gram over the time grid
if len(time_centers) > 1:
gram_interpolator = interp1d(
time_centers, gram[n, :],
kind='linear', bounds_error=False,
fill_value=(gram[n, 0], gram[n, -1]))
# If only one time point, create constant interpolator
else:
gram_interpolator = _const_interpolator(gram[n, 0])
# Scale each time interval by the piano roll magnitude
for m, (start, end) in enumerate((times * fs).astype(int)):
# Clip the timings to make sure the indices are valid
start, end = max(start, 0), min(end, length)
# add to waveform
output[start:end] += (
wave[start:end] * gram_interpolator(np.arange(start, end)))
# Normalize, but only if there's non-zero values
norm = np.abs(output).max()
if norm >= np.finfo(output.dtype).tiny:
output /= norm
return output
def pitch_contour(times, frequencies, fs, amplitudes=None, function=np.sin,
length=None, kind='linear'):
'''Sonify a pitch contour.
Parameters
----------
times : np.ndarray
time indices for each frequency measurement, in seconds
frequencies : np.ndarray
frequency measurements, in Hz.
Non-positive measurements will be interpreted as un-voiced samples.
fs : int
desired sampling rate of the output signal
amplitudes : np.ndarray
amplitude measurments, nonnegative
defaults to ``np.ones((length,))``
function : function
function to use to synthesize notes, should be :math:`2\pi`-periodic
length : int
desired number of samples in the output signal,
defaults to ``max(times)*fs``
kind : str
Interpolation mode for the frequency and amplitude values.
See: ``scipy.interpolate.interp1d`` for valid settings.
Returns
-------
output : np.ndarray
synthesized version of the pitch contour
'''
fs = float(fs)
if length is None:
length = int(times.max() * fs)
# Squash the negative frequencies.
# wave(0) = 0, so clipping here will un-voice the corresponding instants
frequencies = np.maximum(frequencies, 0.0)
# Build a frequency interpolator
f_interp = interp1d(times * fs, 2 * np.pi * frequencies / fs, kind=kind,
fill_value=0.0, bounds_error=False, copy=False)
# Estimate frequency at sample points
f_est = f_interp(np.arange(length))
if amplitudes is None:
a_est = np.ones((length, ))
else:
# build an amplitude interpolator
a_interp = interp1d(
times * fs, amplitudes, kind=kind,
fill_value=0.0, bounds_error=False, copy=False)
a_est = a_interp(np.arange(length))
# Sonify the waveform
return a_est * function(np.cumsum(f_est))
def chroma(chromagram, times, fs, **kwargs):
"""Reverse synthesis of a chromagram (semitone matrix)
Parameters
----------
chromagram : np.ndarray, shape=(12, times.shape[0])
Chromagram matrix, where each row represents a semitone [C->Bb]
i.e., ``chromagram[3, j]`` is the magnitude of D# from ``times[j]`` to
``times[j + 1]``
times: np.ndarray, shape=(len(chord_labels),) or (len(chord_labels), 2)
Either the start time of each column in the chromagram,
or the time interval corresponding to each column.
fs : int
Sampling rate to synthesize audio data at
kwargs
Additional keyword arguments to pass to
:func:`mir_eval.sonify.time_frequency`
Returns
-------
output : np.ndarray
Synthesized chromagram
"""
# We'll just use time_frequency with a Shepard tone-gram
# To create the Shepard tone-gram, we copy the chromagram across 7 octaves
n_octaves = 7
# starting from C2
base_note = 24
# and weight each octave by a normal distribution
# The normal distribution has mean 72 (one octave above middle C)
# and std 6 (one half octave)
mean = 72
std = 6
notes = np.arange(12*n_octaves) + base_note
shepard_weight = np.exp(-(notes - mean)**2./(2.*std**2.))
# Copy the chromagram matrix vertically n_octaves times
gram = np.tile(chromagram.T, n_octaves).T
# This fixes issues if the supplied chromagram is int type
gram = gram.astype(float)
# Apply Sheppard weighting
gram *= shepard_weight.reshape(-1, 1)
# Compute frequencies
frequencies = 440.0*(2.0**((notes - 69)/12.0))
return time_frequency(gram, frequencies, times, fs, **kwargs)
def chords(chord_labels, intervals, fs, **kwargs):
"""Synthesizes chord labels
Parameters
----------
chord_labels : list of str
List of chord label strings.
intervals : np.ndarray, shape=(len(chord_labels), 2)
Start and end times of each chord label
fs : int
Sampling rate to synthesize at
kwargs
Additional keyword arguments to pass to
:func:`mir_eval.sonify.time_frequency`
Returns
-------
output : np.ndarray
Synthesized chord labels
"""
util.validate_intervals(intervals)
# Convert from labels to chroma
roots, interval_bitmaps, _ = chord.encode_many(chord_labels)
chromagram = np.array([np.roll(interval_bitmap, root)
for (interval_bitmap, root)
in zip(interval_bitmaps, roots)]).T
return chroma(chromagram, intervals, fs, **kwargs)