diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/demo.py b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/demo.py
deleted file mode 100644
index e879bf8..0000000
--- a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/demo.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import torch
-from neuralnet.model import neuralnet   # Call model instance from neuralnet
-
-from utils import get_features
-
-# Load the trained model
-model = neuralnet(input_size=1, output_shape=6)  
-checkpoint = torch.load("model/sentiment-model-19-0.07.ckpt", map_location=torch.device('cpu'))
-
-# Evaluate model
-model.eval()
-model.load_state_dict(checkpoint['state_dict'])
-
-labels = ["angry", "disgust", "fear", "happy", "neutral", "sad"]
-
-# Perform inference   
-audio_path = ['sample/' + label + '.wav' for label in labels]   # just a sampling loop instead of taking one sample at a time
-
-for audio in audio_path:
-    features = get_features(audio)
-    # print(features.shape, features.dtype)
-    input_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).unsqueeze(1)  # Add extra batch dimension and channel dimension
-    # print(input_tensor.shape, input_tensor.dtype)
-
-    with torch.inference_mode():
-        output = model(input_tensor)
-
-    # Convert output to probabilities and get predicted class
-    probabilities = torch.softmax(output, dim=1)
-    predicted_class = torch.argmax(probabilities, dim=1).item()
-
-    # Calculate confidence
-    confidence = probabilities[0, predicted_class].item()
-
-    # Print predicted class
-    print(f"Path: {audio}\nPredicted class: {labels[predicted_class]} --- Confidence: {confidence:.3f}\n")
\ No newline at end of file
diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/engine.py b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/engine.py
new file mode 100644
index 0000000..7c533d4
--- /dev/null
+++ b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/engine.py
@@ -0,0 +1,64 @@
+import numpy as np
+import joblib
+import librosa
+import torch
+
+from sklearn.preprocessing import StandardScaler
+from neuralnet.model import HybridModel
+from feature import getMELspectrogram, splitIntoChunks 
+
+
+EMOTIONS = {
+    1: 'neutral', 
+    2: 'calm', 
+    3: 'happy', 
+    4: 'sad', 
+    5: 'angry', 
+    6: 'fear', 
+    7: 'disgust', 
+    0: 'surprise'
+}
+
+scaler = StandardScaler()
+model = HybridModel(len(EMOTIONS))
+model.load_state_dict(torch.load("model/speech_sentiment.pt", map_location=torch.device('cpu')))
+SAMPLE_RATE = 48000
+scaler = joblib.load('model/scaler.pkl')
+
+def process_audio(audio_file_path):
+    global scaler
+    chunked_spec = []
+
+    # Load audio file
+    audio, sample_rate = librosa.load(audio_file_path, sr=SAMPLE_RATE, duration=3)
+    signal = np.zeros((int(SAMPLE_RATE * 3),))
+    signal[:len(audio)] = audio
+    mel_spectrogram = getMELspectrogram(signal, SAMPLE_RATE)
+    chunks = splitIntoChunks(mel_spectrogram, win_size=128, stride=64)
+
+    chunked_spec.append(chunks)
+    chunks = np.stack(chunked_spec, axis=0)
+    chunks = np.expand_dims(chunks, axis=2)
+
+    # Reshape the chunks
+    chunks = np.reshape(chunks, newshape=(1, -1)) 
+    chunks_scaled = scaler.transform(chunks)
+    chunks_scaled = np.reshape(chunks_scaled, newshape=(1, 7, 1, 128, 128))  
+
+    # Convert to tensor for model input
+    chunks_tensor = torch.tensor(chunks_scaled).float()
+
+    # Model inference
+    with torch.inference_mode():
+        model.eval()
+        _, output_softmax, _ = model(chunks_tensor)
+        predictions = torch.argmax(output_softmax, dim=1)
+        print(predictions)
+        predicted_emotion = EMOTIONS[predictions.item()]
+
+        print(f"Predicted Emotion: {predicted_emotion}")
+        return predicted_emotion
+
+file_path = "fear.wav"
+process_audio(file_path)
+
diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/chunk.py b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/feature.py
similarity index 100%
rename from src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/chunk.py
rename to src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/feature.py
diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/main.py b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/main.py
deleted file mode 100644
index ebfb4ea..0000000
--- a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/main.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import librosa
-import numpy as np
-import torch
-from IPython.display import Audio, display
-import os
-import joblib
-from architecture import HybridModel  # Import your model architecture
-from chunk_and_spectogram import getMELspectrogram, splitIntoChunks  # Import the functions
-
-EMOTIONS = {
-    1: 'neutral', 
-    2: 'calm', 
-    3: 'happy', 
-    4: 'sad', 
-    5: 'angry', 
-    6: 'fear', 
-    7: 'disgust', 
-    0: 'surprise'
-}
-
-# Load your trained model
-LOAD_PATH = os.path.join(os.getcwd(), 'models')
-model = HybridModel(len(EMOTIONS))
-
-# UNCOMMENT THE CODE LINE 1 TO 2 AND COMMENT THE CODE BELOW LINE 3 TO 4 IF YOU PLAN ON RUNNING THE MODEL ON GPU
-
-# 1
-# Load model weights and move to the appropriate device
-# model.load_state_dict(torch.load(os.path.join(LOAD_PATH, '/content/speech_sentiment_asr.pt')))
-# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# model.to(device)  # Move the model to the GPU or keep it on CPU
-# print('Model is loaded from {}'.format(os.path.join(LOAD_PATH, 'speech_sentiment_asr.pt')))
-# 2
-
-# 3
-# Load model weights and move to the appropriate device (CPU version)
-model.load_state_dict(torch.load(os.path.join(LOAD_PATH, '/content/speech_sentiment_asr.pt'), map_location=torch.device('cpu')))
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-model.to(device)  # Move the model to the GPU or keep it on CPU
-print('Model is loaded from {}'.format(os.path.join(LOAD_PATH, 'speech_sentiment_asr.pt')))
-# 4
-
-SAMPLE_RATE = 48000  
-DURATION = 3  
-NUM_MEL_BINS = 128  
-
-# Load your fitted scaler
-scaler = joblib.load('/content/scaler.pkl')
-
-def process_audio(audio_file_path):
-    """
-    Process the audio file, convert to MEL spectrogram, split into chunks, scale and make predictions.
-    """
-    # Load audio file
-    audio, sample_rate = librosa.load(audio_file_path, sr=SAMPLE_RATE)
-
-    # Ensure the audio length is the desired target length
-    target_length = SAMPLE_RATE * DURATION
-    if len(audio) > target_length:
-        audio = audio[:target_length]
-    else:
-        audio = np.pad(audio, (0, target_length - len(audio)), 'constant')
-
-    # Compute MEL spectrogram
-    mel_spectrogram = getMELspectrogram(audio, SAMPLE_RATE)
-    print(f"Mel Spectrogram Shape: {mel_spectrogram.shape}")
-
-    # Split into chunks
-    chunks = splitIntoChunks(mel_spectrogram, win_size=128, stride=64)
-    print(f"Chunks Shape Before Scaling: {chunks.shape}")
-
-    # Pad or truncate to 7 chunks
-    num_chunks = chunks.shape[0]
-    print(f"Number of Chunks: {num_chunks}")
-    if num_chunks < 7:
-        padding = np.zeros((7 - num_chunks, 128, 128))
-        chunks = np.concatenate((chunks, padding), axis=0)
-    elif num_chunks > 7:
-        chunks = chunks[:7]
-
-    # Prepare chunks for model input
-    chunks = chunks[np.newaxis, :]  # Add batch dimension
-    chunks = np.expand_dims(chunks, axis=1)  # Add channel dimension (for CNN)
-    chunks_reshaped = chunks.reshape(1, 7, 1, 128, 128)
-    print(f"Chunks Shape After Reshaping: {chunks_reshaped.shape}")
-
-    # Scale the chunks
-    chunks_scaled = scaler.transform(chunks_reshaped.reshape(1, -1))
-    chunks_scaled = chunks_scaled.reshape(1, 7, 1, 128, 128)
-    print(f"Chunks Shape After Scaling: {chunks_scaled.shape}")
-
-    # Convert to tensor for model input
-    chunks_tensor = torch.tensor(chunks_scaled, device=device).float()
-
-    # Make predictions with the model
-    with torch.no_grad():
-        model.eval()
-        _, output_softmax, _ = model(chunks_tensor)
-        predictions = torch.argmax(output_softmax, dim=1)
-        predicted_emotion = EMOTIONS[predictions.item()]
-
-    # Display the audio
-    display(Audio(audio_file_path))
-
-    # Print the predicted emotion
-    print(f"Predicted Emotion: {predicted_emotion}")
-
-    return predicted_emotion
-
-# Take input audio file from user
-
-print("NOTE: IT YOU HAVE MP3 FILE, THEN PLEASE RUN MP3_TO_WAV.PY SCRIPT TO CONVERT MP3 TO WAV FIRST, THEN ONLY RUN TIS SCRIPT")
-
-print("\n")
-print("\n")
-print("\n")
-
-file_path = input("Enter the path to your .wav file: ")
-
-# Process the audio and predict emotion
-process_audio(file_path)
diff --git a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/utils.py b/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/utils.py
deleted file mode 100644
index ddd77a5..0000000
--- a/src/ASR-with-Speech-Sentiment-Analysis-Text-Summarizer/Speech_Sentiment_Analysis/utils.py
+++ /dev/null
@@ -1,42 +0,0 @@
-""" Function Script for preprocessing audio data and extract features """
-
-import librosa
-import numpy as np
-
-
-# Zero Crossing Rate
-# Reference: https://librosa.org/doc/latest/generated/librosa.feature.zero_crossing_rate.html#librosa.feature.zero_crossing_rate
-def zcr(data, frame_length=2048, hop_length=512):
-    zcr = librosa.feature.zero_crossing_rate(y=data, frame_length=frame_length, hop_length=hop_length)
-    return np.squeeze(zcr)
-
-# RMS Energy
-# Reference: https://librosa.org/doc/latest/generated/librosa.feature.rms.html#librosa.feature.rms
-def rmse(data, frame_length=2048, hop_length=512):
-    rmse = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)
-    return np.squeeze(rmse)
-
-# MFCC
-# Reference: https://librosa.org/doc/latest/generated/librosa.feature.mfcc.html
-def mfcc(data, sr, frame_length=2048, hop_length=512, flatten: bool = True):
-    mfcc_feature = librosa.feature.mfcc(y=data, sr=sr)
-    return np.squeeze(mfcc_feature.T) if not flatten else np.ravel(mfcc_feature.T)
-
-# Feature Extraction of ZCR, RMS, MFCC
-def extract_features(data, sr, frame_length=2048, hop_length=512):
-    result = np.array([])
-    result = np.hstack((result,
-                        zcr(data, frame_length, hop_length),
-                        rmse(data, frame_length, hop_length),
-                        mfcc(data, sr, frame_length, hop_length)
-                                    ))
-    return result
-
-
-""" Original """
-def get_features(path):
-    data, sampling_rate = librosa.load(path, duration=2.5, offset=0.6)
-    # print('Data and sampling rate:', data.shape, sampling_rate)
-    result = extract_features(data, sampling_rate)
-    # print(result.shape)
-    return result