-
Notifications
You must be signed in to change notification settings - Fork 0
/
testImage.py
30 lines (24 loc) · 880 Bytes
/
testImage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import cv2
import numpy as np
import librosa
import matplotlib.pyplot as plt
def upload_audio_get_mfcc_image():
print("Audio received, converting to MFCC")
# Read the audio file from the request
audio_file = request.files['file']
audio_path = audio_file.filename
# Convert to MFCC
mfcc = mp3tomfcc(audio_path, max_pad=400)
# Create an image from the MFCC
plt.figure(figsize=(10, 4))
plt.imshow(mfcc, aspect='auto', origin='lower')
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
return send_file(buf, mimetype='image/png')
def mp3tomfcc(file_path, max_pad):
audio, sample_rate = librosa.load(file_path)
mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=60)
pad_width = max_pad - mfcc.shape[1]
mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
return mfcc