-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
184 lines (160 loc) · 6.39 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
"""Python script for a voice assistant using Streamlit and Ollama
This script creates a user interface using Streamlit to interact with a large language model\
(LLM) from Ollama for voice-based and text-based communication.
Features:
- Audio recording using Streamlit's `audio_recorder` component.
- Speech recognition for Ukrainian (UA) and English (EN) using custom functions.
- Automatic language detection based on transcribed text.
- Text input for user prompts.
- Streamlit chat interface for displaying conversation history.
- Interaction with Ollama's LLM for generating responses.
- Text-to-speech functionality (not implemented in this code).
"""
import warnings
import asyncio
import ollama
import streamlit as st
from audio_recorder_streamlit import audio_recorder
from src.ukrainian_stt import ua_transcribe
from src.english_stt import en_transcribe
from src.transcribe_speak import transcribe_and_speak
from src.utils import convert_audio_to_wav, check_language
from src.identify_lang import identify_language
from config import SYS_MSG
# Suppress warnings
warnings.filterwarnings("ignore")
# File paths
RECORDED_WAV_FILE = "./data/wav/microphone_stereo.wav"
CONV_WAV_FILE = "./data/wav/converted_mono.wav"
WAV_FILE = "./data/wav/chunk.wav"
# Initial conversation history
HISTORY = [{"role": "system", "content": SYS_MSG}]
# Streamlit page configuration
st.set_page_config(
page_title="Voice assistant UA-EN",
page_icon=":trident:",
layout="wide",
initial_sidebar_state="auto",
menu_items=None,
)
def ollama_prompt(model="llama3.1", messages=None):
"""
Sends a prompt to the Ollama LLM and returns a stream of responses.
Args:
model (str, optional): The Ollama model to use. Defaults to "llama3.1".
messages (list, optional): A list of dictionaries representing the conversation history.
Defaults to None.
Returns:
stream: An asynchronous stream of dictionaries containing the LLM's responses.
"""
stream = ollama.chat(model=model, messages=messages, stream=True)
return stream
def stream_parser(stream):
"""
Parses the stream of responses from the LLM and displays them in the Streamlit chat interface.
Args:
stream: An asynchronous stream of dictionaries containing the LLM's responses.
Yields:
str: Each chunk of the LLM's response.
"""
sentence_chunks, response_text = "", ""
st.session_state.messages.append({"role": "assistant", "content": response_text})
print("Assistant: ", end="")
for chunk in stream:
print(chunk["message"]["content"], end="", flush=True)
content = chunk["message"]["content"]
sentence_chunks += content
response_text += content
st.session_state.messages[-1]["content"] += content
if sentence_chunks.endswith(
('."', "\n\n", "**:", ".", "!", "?", '?"', '!"', ":")
):
if any("\u0400" <= char <= "\u04FF" for char in sentence_chunks):
lang = "ua"
else:
lang = "en"
asyncio.run(transcribe_and_speak(text=sentence_chunks, lang=lang))
sentence_chunks = ""
yield chunk["message"]["content"]
def stop_running():
"""The Dummy.Currently does nothing."""
with my_slot1.chat_message("user"):
st.markdown("Stop!")
# Setup order of elements
my_slot0 = st.empty() # most buttons
my_slot1 = st.empty() # chat_message("user")
my_slot2 = st.empty() # chat_message(message["role"] & chat_message("assistant")
# Streamlit custom microphone
col1, col2 = my_slot0.columns([1, 8.5], vertical_alignment="bottom")
with col1:
audio_bytes = audio_recorder(
text="", energy_threshold=0.01, icon_size="5x"
) # if energy_threshold negative - never stops
if audio_bytes is not None and len(audio_bytes) != 44:
st.audio(audio_bytes, format="audio/wav")
with open(file=RECORDED_WAV_FILE, mode="wb") as f:
f.write(audio_bytes)
f.close()
convert_audio_to_wav(audio_file=RECORDED_WAV_FILE, output_file=CONV_WAV_FILE)
# Choose language buttons
with col2:
PRMPT = None
button0, button1, button2, button3 = st.columns(4)
with button0:
if st.button("Stop", use_container_width=True, type="primary"):
stop_running()
with button1:
if st.button("Говорю (UA)", use_container_width=True):
PRMPT = "ua:" + ua_transcribe(CONV_WAV_FILE)
print(PRMPT)
with button2:
if st.button("Talking (EN)", use_container_width=True):
PRMPT = "en:" + en_transcribe(CONV_WAV_FILE)
print(PRMPT)
with button3:
if st.button("Automatic", use_container_width=True):
DEF_LANG = "???"
DEF_LANG = identify_language(CONV_WAV_FILE)
if DEF_LANG in [
["uk: Ukrainian"],
["pl: Polish"],
["ru: Russian"],
["be: Belarusian"],
]:
PRMPT = "ua:" + ua_transcribe(CONV_WAV_FILE)
print(PRMPT)
else:
PRMPT = "en:" + en_transcribe(CONV_WAV_FILE)
if PRMPT == "Didn't recognize that.":
print(PRMPT)
PRMPT = None
else:
print(PRMPT)
user_prompt = st.chat_input(placeholder="Краще напишу/I'll write instead")
if user_prompt is not None:
user_prompt = check_language(user_prompt=user_prompt)
# Checks for existing messages in session state
if "messages" not in st.session_state:
st.session_state.messages = HISTORY
# Display chat messages from session state
for message in st.session_state.messages:
with my_slot2.chat_message(message["role"]):
st.markdown(message["content"])
if user_prompt is not None or PRMPT is not None:
# Display user prompt in chat message widget
with my_slot1.chat_message("user"):
print()
print("User:", end="")
print(user_prompt or PRMPT)
st.markdown(user_prompt or PRMPT)
# adds user's prompt to session state
st.session_state.messages.append({"role": "user", "content": user_prompt or PRMPT})
# retrieves response from model
LLM_STREAM = ollama_prompt(
messages=st.session_state.messages,
)
with my_slot2.chat_message("assistant"):
try:
st.write(stream_parser(LLM_STREAM))
except stop_running():
pass