-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
154 lines (128 loc) · 4.06 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
This script uses the OpenAI API and ElevenLab's REST API
"""
import sys
import speech_recognition as sr # type: ignore
from openai import OpenAI
from elevenlabs import stream
from elevenlabs.client import ElevenLabs, Voice, VoiceSettings
from rich.console import Console
from rich.text import Text
from config import (
OPENAI_API_KEY,
OPENAI_ORG_ID,
OPENAI_PROJECT_ID,
OPENAI_MODEL,
OPENAI_SYSTEM_PROMPT,
ELEVENLABS_API_KEY,
ELEVENLABS_VOICE_ID,
)
console = Console()
oai_client = OpenAI(
api_key=OPENAI_API_KEY,
organization=OPENAI_ORG_ID,
project=OPENAI_PROJECT_ID,
)
e_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
voice_ident = ELEVENLABS_VOICE_ID
def generate_and_play_response(user_input, conversation_history):
"""
Generates response using the OpenAI API and
plays it using the ElevenLabs TTS API.
Args:
user_input (str): The user's input.
conversation_history (list): The conversation history as a list of
dictionaries, where each dictionary represents a message with 'role'
(either 'user' or 'assistant') and 'content' (the message content).
Returns:
None
"""
conversation_history.append({"role": "user", "content": user_input})
completion = oai_client.chat.completions.create(
model=OPENAI_MODEL,
messages=conversation_history,
temperature=0.5,
top_p=0.95,
max_tokens=128,
frequency_penalty=0.5,
presence_penalty=0.5,
stream=True,
)
response_text = ""
for chunk in list(completion):
if chunk.choices[0].delta.content is not None:
response_text += chunk.choices[0].delta.content
conversation_history.append(
{"role": "assistant", "content": response_text.strip()}
)
assistant_text = Text("🤖 ", style="green")
assistant_text.append(response_text.strip())
console.print(assistant_text)
def text_stream():
yield response_text
audio_stream = e_client.generate(
text=text_stream(),
voice=Voice(
voice_id=ELEVENLABS_VOICE_ID,
settings=VoiceSettings(
stability=0.5,
similarity_boost=0.0,
style=0.0,
use_speaker_boost=True,
optimize_streaming_latency=4,
),
),
model="eleven_turbo_v2",
stream=True,
)
stream(audio_stream)
def recognize_speech(timeout=20):
"""
Recognizes speech from the microphone input.
Args:
timeout (int): The maximum number of seconds to wait for speech input.
Returns:
str or None: The recognized speech as a string,
or None if no speech was detected or an error occurred.
"""
recognizer = sr.Recognizer()
with sr.Microphone() as source:
console.print("Listening...\n")
try:
audio = recognizer.listen(source, timeout=timeout)
except sr.WaitTimeoutError:
console.print("Timeout reached, please try again.")
return None
try:
return recognizer.recognize_google(audio)
except sr.UnknownValueError:
console.print("Could not understand audio")
return None
except sr.RequestError as e:
console.print(f"Could not request results; {e}")
return None
def main(use_voice=False):
"""
Main function for running the QuickChat application.
Parameters:
- use_voice (bool): Flag indicating whether to use voice input.
Returns:
- None
"""
conversation_history = [
{"role": "system", "content": OPENAI_SYSTEM_PROMPT}
]
while True:
if use_voice:
user_input = recognize_speech()
if user_input is None:
continue
else:
user_input = input("\nHow can I help you? ")
generate_and_play_response(user_input, conversation_history)
if __name__ == "__main__":
try:
use_voice_input = "--voice" in sys.argv
main(use_voice_input)
except KeyboardInterrupt:
console.print("\n\nGoodbye for now ...\n")