-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.pyw
237 lines (211 loc) · 12 KB
/
main.pyw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import base64
import io
import os
import pickle
import tkinter as tk
import webbrowser
from tkinter import filedialog, messagebox, simpledialog
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pygame
from PIL import Image, ImageTk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
pygame.mixer.init() # Initialize the pygame mixer for audio playback
canvas = None # Initialize the canvas for displaying plots (used later in the code)
# Base64-encoded string representing the application icon
ICON_BASE64 = """iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAAAS1BMVEUAAADvjJS9vb1SSkLm5ube3t7Ozs7vKTrFxcXvvcWcnJzelJTvWmtaUlLeITHv7+/W1ta1tbWtra2lpaX3lJT3WmOtSkr3QkqcOkLJAJcIAAAAAXRSTlMAQObYZgAAAFFJREFUGNOljDcOgEAQA232EpfI4f8vRULao6CD6WZkGd+RAQTZq1spmcxUN6bsKwnFJzNO57M/Nm+tiKpzvlYX5hZSiPH+QaProGhYXgV/uAAQeQHIXWPCWwAAAABJRU5ErkJggg=="""
# Function to decode the base64 icon data and return a PhotoImage object
def get_icon_from_base64(base64_string):
icon_data = base64.b64decode(base64_string) # Decode the base64 string
icon = Image.open(io.BytesIO(icon_data)) # Open the decoded data as an image
return ImageTk.PhotoImage(icon) # Convert the image to a PhotoImage object for use in Tkinter
# Function to extract audio features from a given audio file
def extract_features(audio_path):
y, sr = librosa.load(audio_path, sr=None) # Load the audio file
# Compute the mean of Mel-frequency cepstral coefficients (MFCCs)
mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
# Compute the mean of the chroma feature
chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
# Compute the mean of the Mel-scaled power spectrogram
mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
# Compute the mean of spectral contrast
contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)
# Concatenate all features into a single array and return it
return np.hstack([mfccs, chroma, mel, contrast])
# Function to load an existing model and label encoder, or initialize a new one if none exist
def load_or_initialize_model():
global model, le, X_train, y_train # Declare global variables to store the model and training data
model = SVC(kernel="linear") # Initialize an SVM model with a linear kernel
le = LabelEncoder() # Initialize a label encoder for encoding tone labels
# Check if the model and label encoder files exist
if os.path.exists("tone_model.pkl") and os.path.exists("label_encoder.pkl"):
# Load the saved model and label encoder
with open("tone_model.pkl", "rb") as f:
model = pickle.load(f)
with open("label_encoder.pkl", "rb") as f:
le = pickle.load(f)
# Check if the training data files exist
if os.path.exists("X_train.npy") and os.path.exists("y_train.npy"):
X_train = np.load("X_train.npy") # Load training features
y_train = np.load("y_train.npy") # Load training labels
else:
X_train = np.array([]) # Initialize empty arrays if training data doesn't exist
y_train = np.array([])
else:
X_train = np.array([]) # Initialize empty arrays if model and label encoder don't exist
y_train = np.array([])
# If there is no training data, train the model with predefined data
if X_train.size == 0:
messagebox.showinfo("Info", "No initial training data found. Training with predefined data.")
initial_audio_files = ["neutral.wav", "happy.wav", "sad.wav", "mad.wav"] # Predefined audio files
initial_labels = ["neutral", "happy", "sad", "mad"] # Corresponding labels
data = []
labels = []
for audio, label in zip(initial_audio_files, initial_labels):
if os.path.exists(audio):
features = extract_features(audio) # Extract features for each predefined audio file
data.append(features) # Add the features to the data list
labels.append(label) # Add the corresponding label to the labels list
else:
messagebox.showinfo("Error", f"File {audio} not found!")
if data:
X_train = np.array(data) # Convert the data list to a NumPy array
y_train = np.array(labels) # Convert the labels list to a NumPy array
le.fit(y_train) # Fit the label encoder with the training labels
y_encoded = le.transform(y_train) # Encode the labels
model.fit(X_train, y_encoded) # Train the SVM model with the training data
# Save the trained model, label encoder, and training data to files
with open("tone_model.pkl", "wb") as f:
pickle.dump(model, f)
with open("label_encoder.pkl", "wb") as f:
pickle.dump(le, f)
np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)
# Function to predict the tone of a given audio file
def predict_tone(audio_path):
global X_train, y_train # Access global variables for the model and training data
if os.path.exists(audio_path): # Check if the audio file exists
if X_train.size == 0:
return "Model not trained yet. Please provide some initial data."
features = extract_features(audio_path) # Extract features from the audio file
features = features.reshape(1, -1) # Reshape features to match the input format of the model
prediction = model.predict(features) # Predict the tone using the trained model
return le.inverse_transform(prediction)[0] # Return the predicted tone label
else:
return "File not found!"
# Function to update the model with new training data and retrain it
def update_model(audio_path, correct_tone):
global X_train, y_train # Access global variables for the model and training data
features = extract_features(audio_path) # Extract features from the audio file
# Add the new features to the existing training data
X_train = (
np.vstack([X_train, features]) if X_train.size else features.reshape(1, -1)
)
y_train = np.append(y_train, correct_tone) # Append the correct label to the training labels
le.fit(y_train) # Refit the label encoder with the updated labels
y_encoded = le.transform(y_train) # Encode the updated labels
model.fit(X_train, y_encoded) # Retrain the model with the updated training data
# Save the updated model, label encoder, and training data to files
with open("tone_model.pkl", "wb") as f:
pickle.dump(model, f)
with open("label_encoder.pkl", "wb") as f:
pickle.dump(le, f)
np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)
# Function to plot the waveform of an audio file using matplotlib
def plot_waveform(audio_path):
y, sr = librosa.load(audio_path) # Load the audio file
fig, ax = plt.subplots(figsize=(8, 2)) # Create a figure and axis for the plot
librosa.display.waveshow(y, sr=sr, ax=ax) # Plot the waveform
ax.set_title("Waveform") # Set the title of the plot
return fig # Return the figure object
# Function to play an audio file using pygame
def play_audio(audio_path):
pygame.mixer.music.load(audio_path) # Load the audio file
pygame.mixer.music.play() # Play the audio file
# Function to open a file dialog and predict the tone of the selected audio file
def open_file():
global canvas # Access the global canvas variable
file_path = filedialog.askopenfilename(
filetypes=[("Audio Files", "*.wav")]) # Open file dialog to select a WAV file
if file_path:
tone = predict_tone(file_path) # Predict the tone of the selected file
result_label.config(text=f"Predicted Tone: {tone}") # Display the predicted tone
if canvas:
canvas.get_tk_widget().destroy() # Destroy the previous canvas if it exists
fig = plot_waveform(file_path) # Plot the waveform of the selected file
canvas = FigureCanvasTkAgg(fig, master=window) # Embed the plot into the Tkinter window
canvas.draw() # Draw the canvas
canvas.get_tk_widget().pack(pady=20) # Pack the canvas widget with padding
play_audio(file_path) # Play the selected audio file
response = messagebox.askquestion(
"Confirm Prediction", f"Is the prediction '{tone}' correct?"
) # Ask the user to confirm the predicted tone
if response == "no":
correct_tone = simpledialog.askstring(
"Correct Tone", "Please enter the correct tone:"
) # Ask the user for the correct tone if the prediction was wrong
if correct_tone:
update_model(file_path, correct_tone) # Update the model with the correct tone
messagebox.showinfo(
"Model Updated", "The model has been updated with the correct tone."
) # Inform the user that the model was updated
else:
messagebox.showwarning(
"Input Error",
"No correct tone provided. The model was not updated.",
) # Warn the user if no correct tone was provided
else:
messagebox.showinfo(
"Confirmation",
"The prediction was confirmed as correct. No updates were made to the model.",
) # Inform the user if the prediction was confirmed as correct
# Function to display a help message
def open_help():
messagebox.showinfo("Help",
"This program allows you to use and train an AI to figure out the tone of vocals\n\n"
"Code: Landon & Emma\n")
# Function to open the project's GitHub repository in a web browser
def open_repository():
webbrowser.open_new("https://github.com/LandonAndEmma/Voice-Tone-Detector")
# Function to handle the window close event
def on_closing():
# Stop any playing audio
if pygame.mixer.music.get_busy(): # Check if any audio is playing
pygame.mixer.music.stop() # Stop the audio
# Destroy the plot canvas if it exists
global canvas
if canvas:
canvas.get_tk_widget().destroy() # Destroy the canvas widget
# Close the window and stop the Tkinter main loop
window.destroy()
window.quit() # Quit the Tkinter main loop
# Create the main Tkinter window
window = tk.Tk()
window.title("Voice Tone Detector") # Set the window title
icon = get_icon_from_base64(ICON_BASE64) # Get the application icon from the base64 string
window.iconphoto(False, icon) # Set the window icon
window.geometry("800x301") # Set the window size
window.minsize(275, 301) # Set the minimum window size
window.maxsize(800, 301) # Set the maximum window size
window.resizable(True, False) # Make the window resizable horizontally but not vertically
window.attributes('-fullscreen', False) # Disable fullscreen mode
window.protocol("WM_DELETE_WINDOW", on_closing) # Bind the window close event to the on_closing function
# Create a menu bar and add file and help menus
menubar = tk.Menu(window)
file_menu = tk.Menu(menubar, tearoff=0)
file_menu.add_command(label="Open", command=open_file) # Add an "Open" command to the file menu
menubar.add_cascade(label="File", menu=file_menu) # Add the file menu to the menu bar
help_menu = tk.Menu(menubar, tearoff=0)
help_menu.add_command(label="Help", command=open_help) # Add a "Help" command to the help menu
help_menu.add_command(label="Repository", command=open_repository) # Add a "Repository" command to the help menu
menubar.add_cascade(label="Help", menu=help_menu) # Add the help menu to the menu bar
window.config(menu=menubar) # Set the menu bar for the window
result_label = tk.Label(window, text="Predicted Tone: None") # Create a label to display the predicted tone
result_label.pack(pady=20) # Pack the label with padding
load_or_initialize_model() # Load or initialize the model when the program starts
window.mainloop() # Start the Tkinter main loop to run the application