generated from cheshire-cat-ai/plugin-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
local_whisper_cat.py
148 lines (111 loc) · 4.28 KB
/
local_whisper_cat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import requests
import io
from pydantic import BaseModel, Field
from cat.log import log
from cat.mad_hatter.decorators import hook, plugin
import base64
class Settings(BaseModel):
url_model: str = Field(
title="url model",
description="The name of the container for OpenAI's transcription model.",
default="http://openai-whisper-asr-webservice:9000",
)
audio_key: str = Field(
title="Audio Key",
description="The name of the json field for the WebSocket message sent from the user in order to recognize your audio. Defaults to 'audio_key'.",
default="audio_key",
)
language: str = Field(
title="Language",
description="The language of the audio file. Defaults to 'en'.",
default="en",
)
@plugin
def settings_schema():
return Settings.schema()
def decode_base64_audio(audio: str) -> bytes:
"""Decode a base64 encoded audio file."""
return base64.b64decode(audio)
def transcript(audio_file, url, language):
name_file = audio_file[0] # Get the name of the file
audio_body = audio_file[1] # Get the file
type_file = audio_file[2] # Get the type of the file
file_size = len(audio_body)
if file_size > 25 * 1000000: # Check file size
return "Over 25MB? The audio shouldn't be this large."
headers = {
"accept": "application/json",
}
params = {
"encode": "true",
"task": "transcribe",
"language": language,
"word_timestamps": "false",
"output": "json",
}
files = {
"audio_file": (name_file, audio_body, type_file),
}
try:
response = requests.post(
url + "/asr", params=params, headers=headers, files=files
)
except requests.exceptions.ConnectionError as e:
print("I'm sorry, I couldn't connect to the server. Please try again later")
print(e)
return "I'm sorry, I couldn't connect to the server. Please try again later"
if response.ok:
# response.raise_for_status() # Ensure we notice bad responses
json_response = response.json()
return json_response["text"]
else:
print("Status Code ", response.status_code)
return "I'm sorry, I couldn't transcribe the audio file. Please try again later"
@hook(priority=99)
def before_cat_reads_message(message: dict, cat) -> dict:
settings = cat.mad_hatter.get_plugin().load_settings()
try:
if settings == {}:
log.error("No configuration found for Local Whisper Cat")
raise Exception("No configuration found for Local Whisper Cat reverting to default settings.")
except Exception as e:
default_settings = Settings()
# Default settings loaded
# Imposta i valori di default
settings = {
"url_model": default_settings.url_model,
"audio_key": default_settings.audio_key,
"language": default_settings.language,
}
if settings["audio_key"] not in message.keys():
log.error("This message does not contain an audio file.")
return message
print("message[settings['audio_key']]: " + str(message[settings["audio_key"]]))
if message[settings["audio_key"]] == "":
log.error("The audio file path is empty.")
return message
received_blob = message[settings["audio_key"]] # Get the file path from the message
if "encodedBase64" in message.keys() and message["encodedBase64"] == True:
print("Decodifico il file da base64")
# decode the file from base64
decoded_blob = decode_base64_audio(received_blob)
else:
print("File not in base64 or wrongly flagged")
file = io.BytesIO(decoded_blob)
try:
with file:
data = file.read()
except OSError as e:
print(f"Error opening the file: {e}")
name_file = message["audio_name"] # Get the name of the file
audio_body = data # Get the file content
type_file = message["audio_type"] # Get the type of the file
file = (name_file, audio_body, type_file)
# Making the transcription
transcription = transcript(
file,
url=settings["url_model"],
language=settings["language"],
)
message["text"] = transcription
return message