-
Notifications
You must be signed in to change notification settings - Fork 13
/
main.py
69 lines (56 loc) · 2.33 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import shutil
import openai as openai
import pysrt as pysrt
def transcribe_single_audio(api_key, audio_file, new_file_path):
audio_file = open(audio_file, "rb")
openai.api_key = api_key
transcript = openai.Audio.translate("whisper-1", audio_file, response_format="srt", language="en")
f = open(new_file_path, "a+", encoding="utf-8")
f.write(transcript)
print(new_file_path + " finish")
f.close()
def transcribe_all_audio(api_key, dir_path):
for root, dirs, files in os.walk(dir_path):
i = 0
for file in files:
file_path = os.path.join(root, file)
file_name = os.path.basename(file_path)
if file_name.endswith(".mp3"):
i = i + 1
if file_name.endswith(".mp3") and i > 303:
# print(file_name)
# exit()
transcribe_single_audio(api_key, file_path, os.path.join(root, file_name + ".srt"))
def extract_all_srt_text(folder_path, new_file_path):
# 获取文件夹中的所有文件
files = os.listdir(folder_path)
# 遍历所有文件
for file in files:
# 构建文件的完整路径
file_path = os.path.join(folder_path, file)
file_name = os.path.basename(file_path)
# 检查文件是否为srt文件
if file.endswith(".srt"):
text = extract_srt_text(file_path)
f = open(new_file_path + "\\" + file_name.replace(".srt","") + ".txt", "a+", encoding="utf-8")
f.write(text)
f.flush()
f.close()
def extract_srt_text(file_path):
# 打开srt文件
subs = pysrt.open(file_path)
# 提取文本内容
text = ""
for sub in subs:
text += sub.text_without_tags + "\n"
return text.strip()
if __name__ == '__main__':
proxy = "http://127.0.0.1:7890"
os.environ["http_proxy"] = proxy
os.environ["https_proxy"] = proxy
api_key = "sk-xxxxxxx"
dir_path = "C:\\Users\\guagu\\Desktop\\english_pod_audio\\"
# transcribe_single_audio(api_key, "C:\\Users\\guagu\\Desktop\\english_pod_audio\\englishpod_0001pb.mp3", "C:\\Users\\guagu\\Desktop\\english_pod_audio\\englishpod_0001pb.srt")
# transcribe_all_audio(api_key, dir_path)
extract_all_srt_text("C:\\Users\\guagu\\Desktop\\english_pod_audio\\srt", "C:\\Users\\guagu\\Desktop\\english_pod_audio\\txt")