-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_radio_section.py
279 lines (220 loc) · 10.4 KB
/
generate_radio_section.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#%% import section
import torch
# Imports used through the rest of the notebook.
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices
from ctransformers import AutoModelForCausalLM
from tqdm import tqdm
from datetime import datetime
from pydub import AudioSegment
from pydub.effects import normalize
import os
#preset super_fast, fast, standard, high_quality
preset = "fast"
# This will download all the models used by Tortoise from the HF hub.
# tts = TextToSpeech()
# If you want to use deepspeed the pass use_deepspeed=True nearly 2x faster than normal
tts = TextToSpeech(use_deepspeed=False, kv_cache=True, device='cuda')
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("/workspace/data/Mistral-7B-Instruct-v0.1-GGUF", model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", model_type="mistral", gpu_layers=50)
def checkVersion():
print(torch.__version__)
# Check if CUDA is available
cuda_available = torch.cuda.is_available()
# Print the result
if cuda_available:
print("CUDA is available in your PyTorch installation.")
print("CUDA version:", torch.version.cuda)
else:
print("CUDA is not available in your PyTorch installation.")
#%% run topics
import datetime
def get_day_of_week():
# Get the current date and time
current_datetime = datetime.datetime.now()
# Use the strftime method to format the date and extract the day of the week
day_of_week = current_datetime.strftime("%A")
return day_of_week
# Call the function and print the result
day = get_day_of_week()
print("Today is", day)
#Create a list of topics
generalTopics = ["God", "chat G.P.T. religion", "quantum physics", "A.I. will make human useless", "descoveries in the human brain", "close the program"]
listOFTopics = ["They talk about if god exist or not, for instance if God created the universe and god is omniscient, omnipotent and omnibenevolent, why is needed a simulation? make it funny",
"They talk about chat G.P.T. inventing a new religion that all humans are going to follow blindly",
"They explain quantum physics for advanced audience, use complex terms",
"They talk about A.I. will make humans useless, for instance, if A.I. can do everything, why do we need humans?",
"They talk about recent descoveries in the human brain, make it funny",
"They close the program, the Radio program is called 'Stockastic Piraites', make it funny"]
print("number of topics", len(listOFTopics))
preTopic = "Generate a radio conversation of two commentators, Anna and Phillip. "
topic = ""
postTopic = ". Don't bring more people into the conversation, only two voices. Only refers to them with their names, avoid adding time or other elements."
#Generate the conversations and save them in the data folder
mistral_generation = ""
changeTopic = ""
day = get_day_of_week()
for i in tqdm(range(len(listOFTopics))):
print(i, "\n--------------------------------------")
if i == 0:
changeTopic = "Start with greetings too all the audience, knowing that today is " + day + ". "
else:
changeTopic = "No greetings, don't say hey. They were talking about " + generalTopics[i-1] + " and now they change to " + generalTopics[i] + ". "
prompt = preTopic + changeTopic + listOFTopics[i] + postTopic
print(prompt+"\n")
generate=llm(prompt, stream=False, max_new_tokens=512)
print(generate, "\n")
mistral_generation += generate
#Fix some text errors
replacements = {
"Anna:": "\nAnna:",
"Phillip:": "\nPhillip:",
"(laughing)": "hahahaha",
"(laugh)": "hahaha",
"(laughs)": "hahaha",
"(smiling)": "hahaa",
"(smile)": "hahaha",
"(smiles)": "hahaha",
"(chuckles)": "haha",
"\n\n": "\n",
}
#Fix expressions
final_text = mistral_generation
for old, new in replacements.items():
final_text = final_text.replace(old, new)
final_text = final_text.replace("\n\n", "\n")
# Split the text into lines and get the first line
lines = final_text.splitlines()
# Check if the first line starts with "Anna:" or "Phillip:"
if lines and (lines[0].startswith("Anna:") or lines[0].startswith("Phillip:")):
# Print the first line
print("First line error fixed:", lines[0])
else:
# Erase the first line
lines.pop(0)
# Print the modified text (without the erased line)
final_text = '\n'.join(lines)
print(final_text)
#%% run audio generation
def generateVoice(voice, text, preset, fileName):
# Pick one of the voices from the output above
# Load it and send it through Tortoise.
voice_samples, conditioning_latents = load_voice(voice)
gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset)
torchaudio.save(fileName, gen.squeeze(0).cpu(), 24000)
print("Saved to " + fileName)
#IPython.display.Audio(fileName)
# Get the current date and time
def getCurrentTime():
current_datetime = datetime.now()
# Extract the date, hour, minute, and second components
current_date = current_datetime.strftime("%d%m%y")
current_hour = current_datetime.hour
current_minute = current_datetime.minute
current_second = current_datetime.second
# Calculate the current second of the day
current_second_of_day = current_hour * 3600 + current_minute * 60 + current_second
# Create a unique ID for naming files
file_id = f"{current_second_of_day}_{current_date}"
return file_id
#generate the audio samples
def audioGeneration(voice, text, preset):
print("----------------------------")
print("speaker:", voice, "\n", text)
path = "../workspace/data/presenters/"
time = getCurrentTime()
fileName = path + time +"_"+ voice + '.wav'
generateVoice(voice, text, preset, fileName)
# Specify the path to the 'data' folder
folder_path = '/workspace/data/presenters'
# Check if the folder exists before attempting to remove its contents
if os.path.exists(folder_path):
# Get a list of all files in the 'data' folder
files = os.listdir(folder_path)
print(f"Removing {len(files)} files from {folder_path}...")
# Iterate through the files and remove each one
for file in files:
file_path = os.path.join(folder_path, file)
try:
if os.path.isfile(file_path):
os.remove(file_path)
print(f"Successfully deleted {file_path}")
except Exception as e:
print(f"Error deleting {file_path}: {e}")
else:
# If the folder doesn't exist, create it
os.makedirs(folder_path)
#Split text into two sections dictionary, one for Anna and one for Phillip
lines = final_text.split('\n')
# Flag to determine the current speaker
current_speaker = None
# Iterate through lines and populate dictionaries
for line in lines:
if line.startswith("Anna:"):
current_speaker = "emma" #this is the voice name in the dataset
text = line[len("anna:"):].strip()
audioGeneration(current_speaker, text, preset)
elif line.startswith("Phillip:"):
current_speaker = "tom" #this is the voice name in the dataset
text = line[len("phillip:"):].strip()
audioGeneration(current_speaker, text, preset)
folder_path = "/workspace/data/presenters/"
# Get a list of all files in the folder
file_names = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
# Print the list of file names
# print("File names in the folder:")
# for file_name in file_names:
# print(file_name)
#----------------------------------------------------------------------
print("Generating audio files...")
sourceFolder = "../workspace/data/rendered_audio/"
audio_segment = AudioSegment.silent(duration=0)
# Create an empty audio segment to hold the concatenated audio
concatenated_audio = AudioSegment.silent(duration=4000)
# Iterate through each audio file and concatenate it
for file in file_names:
audio_segment = AudioSegment.from_file(folder_path+file, format="wav")
# Normalize the audio to increase volume
audio_segment = normalize(audio_segment)
concatenated_audio += audio_segment
# Export the concatenated audio to a new file
concatenated_audio.export(sourceFolder+"concatenated_output_1.mp3", format="wav")
#----------------------------------------------------------------------
print("Generating background music...")
pathBackground = "../workspace/data/audio_background/"
# Load your audio tracks
track0 = AudioSegment.from_file(pathBackground+"opening_news.mp3", format="mp3")
track1 = AudioSegment.from_file(pathBackground+"Pop.mp3", format="mp3")
track2 = AudioSegment.from_file(pathBackground+"japanese_pop.mp3", format="mp3")
track3 = AudioSegment.from_file(pathBackground+"cumbia_3.mp3", format="mp3")
# Add the tracks together
combined_track = track0 + track1 + track2 + track3 + track0
# Reduce the volume to half with a ramp of 1 second after 2 seconds
ramp_duration = 2000 # 1 second in milliseconds
start_time = 2000 # 2 seconds in milliseconds
# Extract the part of the audio before the volume reduction
audio_before_ramp = combined_track[:start_time]
# Extract the part of the audio during the volume reduction and apply fade-out
audio_ramp = combined_track[start_time:start_time + ramp_duration].fade_out(ramp_duration)
# Extract the part of the audio after the volume reduction
audio_after_ramp = combined_track[start_time + ramp_duration:]
# Decrease the gain by 12 dB to reduce volume to half
audio_half_volume = audio_after_ramp - 13
# Combine the audio segments
final_audio = audio_before_ramp + audio_ramp + audio_half_volume
# Export the modified audio to a new file
final_audio.export(pathBackground+"new_background.mp3", format="mp3")
#----------------------------------------------------------------------
print("Mixing background music with conversation...")
# Mix the background music with the conversation
background_music = AudioSegment.from_file(pathBackground+"new_background.mp3", format="mp3")
mixed_audio = concatenated_audio.overlay(background_music) # Add more overlay calls for additional tracks
# Export the mixed audio to a new file
time = getCurrentTime()
fileName = sourceFolder+time+"_mixed_output.mp3"
mixed_audio.export(fileName, format="wav")
print(fileName)