-
Notifications
You must be signed in to change notification settings - Fork 5
/
NN_predict_pipeline.py
90 lines (67 loc) · 2.85 KB
/
NN_predict_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# %%
import numpy as np
import pandas as pd
import librosa
import librosa.display
from sklearn.preprocessing import normalize
from keras.models import load_model
# buff depedence
import os
import io
#import cv2
import matplotlib.pyplot as plt
#import urllib3
#from urllib3 import urlopen
# URL
import cloudinary
import soundfile as sf
from six.moves.urllib.request import urlopen
import tensorflow as tf
#------------------------------------------------
mel_coefs = 50
max_frequency = 10000
#--------------------------------------------------
def predict_instrument(url):
'''This function includes ETL process, loading trained model,
and using model to get prediction'''
# -------------------------------ETL preprocessing part------------------------------------
'''TESTING: USE LOCAL FILE PATH AS input_audioFile '''
#f = sf.SoundFile(file_name)
#length = len(f) / f.samplerate
#URL
audio, samplerate = sf.read(io.BytesIO(urlopen(url).read()), start=0, stop=44100)
#data = urlopen(url)
data_22k = librosa.resample(audio, samplerate, 22050)
#audio, sample_rate = librosa.load(data, duration=1, res_type='kaiser_fast')
mfccs = librosa.feature.melspectrogram(y=data_22k)
# Normalize spectrogram values
mfccs_norm = normalize(mfccs.T, axis=0, norm='max')
# convert mfccs_norm into 4d array
channels = 1 # number of audio channels
row = 1
spectrogram_shape1 = (row,) + mfccs_norm.shape + (channels,)
#x_reshape = np.array(i.reshape( (spectrogram_shape1) ) for i in mfccs_norm)
inst_ETL_4d_output = mfccs_norm.reshape( (spectrogram_shape1) )
print(inst_ETL_4d_output.shape)
# --------------------------------Load trained inst model--------------------------
# load trained inst model
#with open('CV_PKL_trained_instruments_model.pkl', 'rb') as inst_f:
# inst_model = pickle.load(inst_f)
#from keras.models import load_model
inst_model = tf.keras.models.load_model('trained_intruments_model.h5')
# --------------------------------PREDICTION --------------------------
# inst_model (from app.py) to predict
inst_result = inst_model.predict(inst_ETL_4d_output)
# reverse to_categorical() function, get correlated inst_name
inst_scalar = np.argmax(inst_result, axis=None, out=None)
# extract inst names from csv to be a list
inst_Name_df = pd.read_csv('CV_inst_Name.csv')
inst_name_list = inst_Name_df['0'].tolist()
# reverse labelEncoder() function to get prediction label
NN_inst_pred = inst_name_list[inst_scalar]
return NN_inst_pred
# %%
url = "https://raw.githubusercontent.com/susiexia/AI_Music/susie/BTb-ord-A%231-ff-N-T30d.wav"
#url = "https://res.cloudinary.com/dmqj5ypfp/video/upload/v1588530259/Uploaded_audio/a0frl4m8km6rfi48ur6m.wav"
predict_instrument(url)
# %%