-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtranscribe2.py
124 lines (101 loc) · 3.94 KB
/
transcribe2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#%pylab inline
#pylab.rcParams['figure.figsize'] = 26, 5
from scipy.io import wavfile
from scipy.fftpack import fft
from scipy.optimize import lsq_linear
import numpy as np
import pylab
import sys, os, subprocess
import random
from pyknon.genmidi import Midi
from pyknon.music import NoteSeq, Note
#learning
classes = np.arange(-30, 40) #notes from F#2 to E8
n_classes = len(classes)
tempo = 180 #tempo of file for sample notes. fps / tempo gives how much of the sample we analyze
fps = 6 #frequency of the chords we recognize
part_length = 44100 / fps #size of a part we analyze
input_length = 600 #number of amplitudes of spectrogram we analyze
suppress_noise = 10000 # for nice printing
timespan = 60 * 5 * fps / tempo
out_tempo = fps * 60
minimal_volume = 0.01 # output volume threshold
#testing
poly = 0 # size of chord to test recognition on
n_samples = 50 # number of tests
#todo: polishing
def read_mp3(filename):
if filename.endswith('.mp3'):
rc=1
if rc: rc = os.system('mpg123 -w temp.wav '+filename)
if rc: rc = os.system('ffmpeg -i '+filename+' -vn -acodec pcm_s16le -ac 1 -ar 44100 -f wav temp.wav')
if rc: rc = os.system('avconv -i '+filename+' -vn -acodec pcm_s16le -ac 1 -ar 44100 -f wav temp.wav')
if rc: rc = os.system('mpg321 -w temp.wav '+filename)
if rc: exit('unable to convert mp3 to wav. install either ffmpeg or avconv or mpg123 or mpg321.')
filename = "temp.wav"
return wavfile.read(filename)
def channel_freqs(channel1, part_length=part_length, input_length=input_length):
#channel1 = channel1[part_length/2:]
parts = len(channel1) / part_length
freqs = np.array([abs(fft(channel1[i*part_length:(i+1)*part_length]))[:input_length] for i in range(parts)])
pylab.imshow(freqs.T, extent=(0,parts,input_length,0), cmap='spectral')
#pylab.show()
return freqs
def random_samples(sample_size):
"get random notes"
return np.array([random.sample(range(n_classes), random.choice([poly])) for i in range(sample_size)])
def clean_freq(samples):
"create freq samples"
sample_size = len(samples)
chords = [NoteSeq([Note(classes[i]) for i in sample]) for sample in samples]
midi = Midi(1, tempo=tempo)
for i in range(sample_size): midi.seq_chords([chords[i]], time=5*i)
midi.write("temp.mid")
subprocess.call("timidity temp.mid -Ow -o temp.wav".split(), stdout=subprocess.PIPE)
rate, data = wavfile.read('temp.wav')
return channel_freqs(data.T[0])[:sample_size*timespan:timespan].astype(int) / suppress_noise
notes_start = clean_freq(np.arange(n_classes).reshape([n_classes,1]))
if poly:
answers = random_samples(n_samples)
g = clean_freq(answers)
k=0
for t in range(n_samples):
vol_orig = g[t].mean()
result = lsq_linear(notes_start.T, g[t], (0, np.inf))
notes = result.x.argsort()[-poly:]
if set(notes) != set(answers[t]):
k+=1
print t, 'precision -', set(notes)-set(answers[t]), 'recall +', set(answers[t])-set(notes)
print k*2, '% error'
def test_output(x, g):
midi = Midi(1, tempo=out_tempo)
for i in range(n_classes):
dur = 0
vol = 0
for t,v in enumerate(x.T[i]):
min_volume = minimal_volume * g[t] / g.mean()
if v*v>min_volume:
if dur:
vol = (vol / dur + v*v/min_volume ) * (dur+1)
else:
vol = v*v/min_volume
dur += 1
elif dur:
midi.seq_notes([Note(classes[i], dur=dur/4., volume=min(100,int(vol)))], time=t)
dur = 0
vol = 0
midi.write("output.mid")
os.system("timidity output.mid")
#f[fi].argsort()[-3:]
if not sys.argv[1:]: sys.argv.append('giovanni_allevi-pensieri_nascosti.mp3')
g = channel_freqs(read_mp3(sys.argv[1])[1].T[0]).astype(int) / suppress_noise
x = np.zeros([len(g),n_classes])
for i,b in enumerate(g):
print '{:.1%}'.format(float(i)/len(g))
result = lsq_linear(notes_start.T, b, (0, np.inf))
if not result.status:
print result
x[i] = result.x
pylab.imshow(x.T, cmap='spectral')
#pylab.show()
test_output(x, g.mean(axis=1))