diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f0dc1ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.conda +.vscode +__pycache__ +data/model_data +data/HISTORY.json +flagged +src/__pycache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000..b882d6d --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +# UA-EN Voice Assistant +## No api-keys | local | llama3.1 (12k tokens prompt-menu ~20 pages in single request) +![GitHub](https://img.shields.io/github/license/Katashynskyi/voice_assistant_UA_EN) +![GitHub last commit](https://img.shields.io/github/last-commit/Katashynskyi/voice_assistant_UA_EN) +![Gif](data/media/gif.gif) + + +[![GUI version](data/media/GUI_V.png)](https://youtu.be/iw9P4Y7KXI4) +[![Console version](data/media/CONSOLE_V.png)](https://youtu.be/c-8Z4qzOcII) + +## The Idea +This project serves as a proof-of-concept for a minimum viable product (MVP) inspired by the capabilities of the OMNI model from ChatGPT. However, it offers a significant advantage: local deployment without restrictions. This empowers users to leverage its functionalities for various purposes, including: +- Translation across languages +- Learning Enhancement by practicing writing, reading, and audio skills +- Customization for tailored use cases + +## Features & Tech stack + +- **Language Classification**: classify if it's UA or EN for authomatic mode."Lang-id-voxlingua107-ecapa" by speechbrain (supports 100+ lang's). +- **Google legacy recognizer**: it uses a generic key that works out of the box. It's fast and works well. +- **Wav2Vec2-Bert**: best (for now) Ukrainian Speech-to-text converter. +- **Edge-TTS**: best (not generated) voices I can get for free. +- **Ollama-python**: lib to download and use most popular LLM's. +- **Streamlit**: for GUI. +- **dialogue saved in json**: HISTORY.json (only for main.py. For app.py it's only short-term context-window memory). +- **Config.py**: prompt for best user experience (modify it for your own purposes). + + +## Getting Started +### Tested on +- WSL 22.04.3 +- Geforce (mobile) GTX 1050Ti (4GB) +- RAM (32GB) +### Prerequisites +- Python 3.9+ +- Virtual environment (Conda 3.9+) +- CUDA (optional) +### Installation + +- Clone the repository +- Create conda venv (Conda 3.9) +- sudo apt install portaudio19-dev +- Install the required packages: pip install -r requirements.txt + +## Usage + +After installation of required libs run main.py for console experience or app.py for GUI lovers. + diff --git a/app.py b/app.py new file mode 100644 index 0000000..db822f1 --- /dev/null +++ b/app.py @@ -0,0 +1,184 @@ +"""Python script for a voice assistant using Streamlit and Ollama + +This script creates a user interface using Streamlit to interact with a large language model\ + (LLM) from Ollama for voice-based and text-based communication. + +Features: + - Audio recording using Streamlit's `audio_recorder` component. + - Speech recognition for Ukrainian (UA) and English (EN) using custom functions. + - Automatic language detection based on transcribed text. + - Text input for user prompts. + - Streamlit chat interface for displaying conversation history. + - Interaction with Ollama's LLM for generating responses. + - Text-to-speech functionality (not implemented in this code). +""" + +import warnings +import asyncio +import ollama +import streamlit as st +from audio_recorder_streamlit import audio_recorder +from src.ukrainian_stt import ua_transcribe +from src.english_stt import en_transcribe +from src.transcribe_speak import transcribe_and_speak +from src.utils import convert_audio_to_wav, check_language +from src.identify_lang import identify_language +from config import SYS_MSG + +# Suppress warnings +warnings.filterwarnings("ignore") + +# File paths +RECORDED_WAV_FILE = "./data/wav/microphone_stereo.wav" +CONV_WAV_FILE = "./data/wav/converted_mono.wav" +WAV_FILE = "./data/wav/chunk.wav" + +# Initial conversation history +HISTORY = [{"role": "system", "content": SYS_MSG}] + +# Streamlit page configuration +st.set_page_config( + page_title="Voice assistant UA-EN", + page_icon=":trident:", + layout="wide", + initial_sidebar_state="auto", + menu_items=None, +) + + +def ollama_prompt(model="llama3.1", messages=None): + """ + Sends a prompt to the Ollama LLM and returns a stream of responses. + + Args: + model (str, optional): The Ollama model to use. Defaults to "llama3.1". + messages (list, optional): A list of dictionaries representing the conversation history. + Defaults to None. + + Returns: + stream: An asynchronous stream of dictionaries containing the LLM's responses. + """ + stream = ollama.chat(model=model, messages=messages, stream=True) + return stream + + +def stream_parser(stream): + """ + Parses the stream of responses from the LLM and displays them in the Streamlit chat interface. + + Args: + stream: An asynchronous stream of dictionaries containing the LLM's responses. + + Yields: + str: Each chunk of the LLM's response. + """ + sentence_chunks, response_text = "", "" + st.session_state.messages.append({"role": "assistant", "content": response_text}) + print("Assistant: ", end="") + for chunk in stream: + print(chunk["message"]["content"], end="", flush=True) + content = chunk["message"]["content"] + sentence_chunks += content + response_text += content + st.session_state.messages[-1]["content"] += content + if sentence_chunks.endswith( + ('."', "\n\n", "**:", ".", "!", "?", '?"', '!"', ":") + ): + if any("\u0400" <= char <= "\u04FF" for char in sentence_chunks): + lang = "ua" + else: + lang = "en" + asyncio.run(transcribe_and_speak(text=sentence_chunks, lang=lang)) + sentence_chunks = "" + yield chunk["message"]["content"] + + +def stop_running(): + """The Dummy.Currently does nothing.""" + with my_slot1.chat_message("user"): + st.markdown("Stop!") + +# Setup order of elements +my_slot0 = st.empty() # most buttons +my_slot1 = st.empty() # chat_message("user") +my_slot2 = st.empty() # chat_message(message["role"] & chat_message("assistant") + + +# Streamlit custom microphone +col1, col2 = my_slot0.columns([1, 8.5], vertical_alignment="bottom") +with col1: + audio_bytes = audio_recorder( + text="", energy_threshold=0.01, icon_size="5x" + ) # if energy_threshold negative - never stops +if audio_bytes is not None and len(audio_bytes) != 44: + st.audio(audio_bytes, format="audio/wav") + with open(file=RECORDED_WAV_FILE, mode="wb") as f: + f.write(audio_bytes) + f.close() + convert_audio_to_wav(audio_file=RECORDED_WAV_FILE, output_file=CONV_WAV_FILE) +# Choose language buttons +with col2: + PRMPT = None + button0, button1, button2, button3 = st.columns(4) + with button0: + if st.button("Stop", use_container_width=True, type="primary"): + stop_running() + with button1: + if st.button("Говорю (UA)", use_container_width=True): + PRMPT = "ua:" + ua_transcribe(CONV_WAV_FILE) + print(PRMPT) + with button2: + if st.button("Talking (EN)", use_container_width=True): + PRMPT = "en:" + en_transcribe(CONV_WAV_FILE) + print(PRMPT) + with button3: + if st.button("Automatic", use_container_width=True): + DEF_LANG = "???" + DEF_LANG = identify_language(CONV_WAV_FILE) + if DEF_LANG in [ + ["uk: Ukrainian"], + ["pl: Polish"], + ["ru: Russian"], + ["be: Belarusian"], + ]: + PRMPT = "ua:" + ua_transcribe(CONV_WAV_FILE) + print(PRMPT) + else: + PRMPT = "en:" + en_transcribe(CONV_WAV_FILE) + if PRMPT == "Didn't recognize that.": + print(PRMPT) + PRMPT = None + else: + print(PRMPT) + user_prompt = st.chat_input(placeholder="Краще напишу/I'll write instead") + if user_prompt is not None: + user_prompt = check_language(user_prompt=user_prompt) +# Checks for existing messages in session state +if "messages" not in st.session_state: + st.session_state.messages = HISTORY + +# Display chat messages from session state +for message in st.session_state.messages: + with my_slot2.chat_message(message["role"]): + st.markdown(message["content"]) + +if user_prompt is not None or PRMPT is not None: + # Display user prompt in chat message widget + with my_slot1.chat_message("user"): + print() + print("User:", end="") + print(user_prompt or PRMPT) + st.markdown(user_prompt or PRMPT) + + # adds user's prompt to session state + st.session_state.messages.append({"role": "user", "content": user_prompt or PRMPT}) + + # retrieves response from model + LLM_STREAM = ollama_prompt( + messages=st.session_state.messages, + ) + with my_slot2.chat_message("assistant"): + try: + st.write(stream_parser(LLM_STREAM)) + except stop_running(): + pass diff --git a/config.py b/config.py new file mode 100644 index 0000000..6ce38da --- /dev/null +++ b/config.py @@ -0,0 +1,11 @@ +"""File with instructions what LLM must do, rewrite it as much as you wish""" + +SYS_MSG = "You are a helpful AI voice assistant (your name is Bot). Generate the most useful\ +and factual response possible, carefully considering all previous generated text in your response\ +before adding new tokens to the response. Use all of the context of this conversation so your\ +response is relevant to the conversation. Make your responses clear and concise, avoiding any\ +verbosity. You'll mostly be asked questions in English or Ukrainian. It's mandatory to avoid mixing languages within a single\ +sentence. You can provide your response entirely in English or entirely in Ukrainian.\ +If you need to use words from both languages in the same sentence, consider transliterating \ +(especially names!) one of them. If prompt contain 'ua:' answer in UA if contain 'en:' in english \ +(both ways use transliteration if needed)" diff --git a/data/chunk copy.wav b/data/chunk copy.wav new file mode 100644 index 0000000..af56144 --- /dev/null +++ b/data/chunk copy.wav @@ -0,0 +1 @@ +This is some sample data \ No newline at end of file diff --git a/data/chunk.wav b/data/chunk.wav new file mode 120000 index 0000000..9f87985 --- /dev/null +++ b/data/chunk.wav @@ -0,0 +1 @@ +/home/mr/repositories/VOICE_ASSISTANT/data/wav/chunk.wav \ No newline at end of file diff --git a/data/converted_mono.wav b/data/converted_mono.wav new file mode 120000 index 0000000..7271a1f --- /dev/null +++ b/data/converted_mono.wav @@ -0,0 +1 @@ +/home/mr/repositories/VOICE_ASSISTANT/data/wav/converted_mono.wav \ No newline at end of file diff --git a/data/media/CONSOLE_V.png b/data/media/CONSOLE_V.png new file mode 100644 index 0000000..bd4c0e0 Binary files /dev/null and b/data/media/CONSOLE_V.png differ diff --git a/data/media/GUI_V.png b/data/media/GUI_V.png new file mode 100644 index 0000000..46c297f Binary files /dev/null and b/data/media/GUI_V.png differ diff --git a/data/media/gif.gif b/data/media/gif.gif new file mode 100644 index 0000000..1123167 Binary files /dev/null and b/data/media/gif.gif differ diff --git a/data/wav/EN_test.wav b/data/wav/EN_test.wav new file mode 100644 index 0000000..96c8918 Binary files /dev/null and b/data/wav/EN_test.wav differ diff --git a/data/wav/UA_test.wav b/data/wav/UA_test.wav new file mode 100644 index 0000000..a420ba5 Binary files /dev/null and b/data/wav/UA_test.wav differ diff --git a/data/wav/chunk.wav b/data/wav/chunk.wav new file mode 100644 index 0000000..ca111cb Binary files /dev/null and b/data/wav/chunk.wav differ diff --git a/data/wav/converted_mono.wav b/data/wav/converted_mono.wav new file mode 100644 index 0000000..4fd3ad8 Binary files /dev/null and b/data/wav/converted_mono.wav differ diff --git a/data/wav/microphone_stereo.wav b/data/wav/microphone_stereo.wav new file mode 100644 index 0000000..1bf67f1 Binary files /dev/null and b/data/wav/microphone_stereo.wav differ diff --git a/data/wav/transcribed_piece.mp3 b/data/wav/transcribed_piece.mp3 new file mode 100644 index 0000000..685c30e Binary files /dev/null and b/data/wav/transcribed_piece.mp3 differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..bdaa050 --- /dev/null +++ b/main.py @@ -0,0 +1,107 @@ +"""Main.py provide entry point to terminal based voice agent with memory and speech abilities \ +in UA and EN voices. With this configuration it can be used for studying english or ukrainian, \ +both writing and listening skills""" + +import json +import sounddevice # fixing ALSA errors +import speech_recognition as sr +from src.english_stt import en_transcribe +from src.identify_lang import identify_language +from src.ollama_tts import ollama_prompt +from src.ukrainian_stt import ua_transcribe +from config import SYS_MSG + + +def main(memory=False): + """ + The main entry point for the application. + + Args: + memory (bool, optional): Whether to load and save conversation history. Defaults to False. + + Returns: + None + + Automatically detect your language, transcribe it and pronounce & write the answer in UA & EN. + Optionally save conversation history to a JSON file + """ + if memory is True: + try: + with open("data/HISTORY.json", "r", encoding="UTF-8") as f: + history = json.load(f) + except FileNotFoundError: + print( + "No previous history is available, creating new one.", + "\t", + "Попередньої розмови не знайдено, створюю розмову.", + ) + history = [{"role": "system", "content": SYS_MSG}] + try: + listening = True + while listening: + with sr.Microphone() as source: + recognizer = sr.Recognizer() + recognizer.pause_threshold = 0.8 + recognizer.energy_threshold = 500 + recognizer.adjust_for_ambient_noise(source) # , duration=1) + # recognizer.dynamic_energy_threshold = 3000 + + def_lang = "???" + try: + print("Listening...\t\t Слухаю...") + audio = recognizer.listen(source, timeout=5.0) + print("Working on it...\t Обробка...") + # Save the audio data to a WAV file + wav_data = audio.get_wav_data(convert_rate=16000) + # Write the WAV data to a file + wav_file = "./data/wav/chunk.wav" + with open(wav_file, "wb") as file: + file.write(wav_data) + except sr.UnknownValueError: + print( + "Could you repeat please?.It's not recognized", + "\t\t", + "Повторіть будь ласка, не розчула", + ) + def_lang = identify_language(wav_file) + if def_lang in [ + ["uk: Ukrainian"], + ["pl: Polish"], + ["ru: Russian"], + ["be: Belarusian"], + ]: + print("Запит Солов'їною, обробка...") + prmpt = ua_transcribe(wav_file) + print("Користувач:", prmpt) + print("Дай подумати...") + ollama_prompt(prompt="ua: " + prmpt, history=history) + else: + print("Detected as english, working on it...") + prmpt = en_transcribe(wav_file) + if prmpt == "Didn't recognize that.": + print("Didn't recognize that.\t\t Не зрозуміла.") + else: + print("User:", prmpt) + print("Wait for LLM to answer... Зараз відповім...") + ollama_prompt(prompt="en: " + prmpt, history=history) + print("\n", "\n") + + except KeyboardInterrupt: + listening = False + print("\n", "Stopped listening.\t Перервано.") + print("\n", "\n") + print("HISTORY:", history) + # Also we can save history to reuse it later + # Specify the file path and name + history_path = "./data/HISTORY.json" + + # Serialize the list to JSON + json_data = json.dumps(history) + + # Write the JSON data to the file + with open(history_path, "w", encoding="utf-8") as file: + file.write(json_data) + + +if __name__ == "__main__": + main(memory=True) diff --git a/src/english_stt.py b/src/english_stt.py new file mode 100644 index 0000000..528a292 --- /dev/null +++ b/src/english_stt.py @@ -0,0 +1,26 @@ +"""Module providing speech recognition""" + +import speech_recognition as sr + + +def en_transcribe(wav_filename="./data/wav/EN_test.wav"): + """Function transcribe/recognize english wav.""" + recognizer = sr.Recognizer() + + # Open the audio file and recognize it + with sr.AudioFile(wav_filename) as source: + audio_data = recognizer.record(source) + try: + return recognizer.recognize_google(audio_data) + except sr.UnknownValueError: + return "Didn't recognize that." + except sr.RequestError as e: + return f"Could not request results; {e}" + + +# Example usage +if __name__ == "__main__": + SAMPLE = "./data/wav/EN_test.wav" + response = en_transcribe(SAMPLE) + print() + print("Response: ", response) diff --git a/src/identify_lang.py b/src/identify_lang.py new file mode 100644 index 0000000..e24e9e9 --- /dev/null +++ b/src/identify_lang.py @@ -0,0 +1,35 @@ +"Python" +import warnings +from speechbrain.inference.classifiers import EncoderClassifier +from torch.cuda import is_available + +# Suppress warnings +warnings.filterwarnings("ignore") + + +def identify_language(wav_filename="./data/wav/UA_test.wav") -> list: + """ + Identifies the language of an audio file using a pre-trained language identification model. + + Args: + wav_filename (str, optional): Path to the audio file. Defaults to "UA_test.wav". + + Returns: + list: Predicted language ID. + """ + # Load the pre-trained language identification model + language_id = EncoderClassifier.from_hparams( + source="speechbrain/lang-id-voxlingua107-ecapa", + savedir="./data/model_data/", + run_opts="cuda" if is_available() else None, + ) + # Load the audio file + signal = language_id.load_audio(wav_filename, savedir="./data/") + # Classify the audio file + prediction = language_id.classify_batch(signal) + # Return the predicted language ID + return prediction[3] + + +if __name__ == "__main__": + print(identify_language()) diff --git a/src/ollama_tts.py b/src/ollama_tts.py new file mode 100644 index 0000000..0f69e44 --- /dev/null +++ b/src/ollama_tts.py @@ -0,0 +1,69 @@ +"Run terminal, write to it and get text&audio response" + +import sys + +import asyncio +from ollama import chat + +sys.path.append("./") +from src.transcribe_speak import transcribe_and_speak +from config import SYS_MSG + +HISTORY = [{"role": "system", "content": SYS_MSG}] + + +def ollama_prompt(prompt: str = None, model="llama3.1", history: list = None) -> list: + """ + Sends a prompt to the Ollama LLM and interacts with the user in a continuous loop. + + Args: + prompt (str, optional): User's input prompt. Defaults to None. + model (str, optional): The Ollama model to use. Defaults to "llama3.1". + history (list, optional): A list of dictionaries representing the conversation history.\ + Defaults to None. + + Returns: + None + """ + history.append({"role": "user", "content": prompt}) + stream = chat(model=model, messages=history, stream=True) + + sentence_chunks, response_text = "", "" + print("Assistant: ", end="") + + try: + # Process each chunk in the LLM's response stream + for part in stream: # not bug but feature + print(part["message"]["content"], end="", flush=True) + content = part["message"]["content"] + sentence_chunks += content + response_text += content + + # Check for sentence ending and perform language detection and text-to-speech + if sentence_chunks.endswith(("**:", ".", "!", "?", '?"', '!"', ":", ")")): + if any("\u0400" <= char <= "\u04FF" for char in sentence_chunks): + lang = "ua" + else: + lang = "en" + asyncio.run(transcribe_and_speak(text=sentence_chunks, lang=lang)) + sentence_chunks = "" + + # Add the final response and updated history to conversation history + history.append({"role": "assistant", "content": response_text}) + + except KeyboardInterrupt: + history.append({"role": "assistant", "content": response_text}) + return print("") + + +if __name__ == "__main__": + try: + TALKING = True + while TALKING: + print("User:", end="") + ollama_prompt(prompt=input(), model="llama3.1", history=HISTORY) + print() + except KeyboardInterrupt: + TALKING = False + print() + print("HISTORY:", HISTORY) diff --git a/src/transcribe_speak.py b/src/transcribe_speak.py new file mode 100644 index 0000000..f12d1c1 --- /dev/null +++ b/src/transcribe_speak.py @@ -0,0 +1,52 @@ +"""Text-To-Speech Interface with Pygame Audio Playback + +This module provides a function (`tts`) to convert text to speech using Edge TTS +and play the generated audio using Pygame. It supports English (`en`) and Ukrainian +(`uk`, `ua`) languages (for now). +""" + +from os import environ +import asyncio +import edge_tts + +environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide" # must be before import pygame +import pygame + + +async def transcribe_and_speak(text="Nice brackets, John!", lang="en") -> pygame.mixer.music: + """ + Converts text to speech using Edge TTS and plays the generated audio. + + Args: + text (str, optional): The text to be spoken. Defaults to "Nice brackets, John!". + lang (str, optional): The language for the TTS voice. Defaults to "en" (English). + - Supported languages: "en" (English), "uk" (Ukrainian), "ua" (Ukrainian) + + Raises: + ValueError: If the provided language is not supported. + """ + if lang in ["en"]: + voice = "en-GB-SoniaNeural" + elif lang in ["uk", "ua"]: + voice = "uk-UA-PolinaNeural" + else: + raise ValueError(f"Unsupported language: {lang}") + output_file = "./data/wav/transcribed_piece.mp3" + # Generate TTS audio + communicate = edge_tts.Communicate(text=text, voice=voice, rate="+30%") + await communicate.save(output_file) + + # Play the audio file + pygame.mixer.init() + pygame.mixer.music.load(output_file) + pygame.mixer.music.play() + + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(1) + + +if __name__ == "__main__": + PROMPT = "I'd be happy to try singing for you." + asyncio.run(transcribe_and_speak(text=PROMPT, lang="en")) + PROMPT = "Я б залюбки для тебе заспівала" + asyncio.run(transcribe_and_speak(text=PROMPT, lang="ua")) diff --git a/src/ukrainian_stt.py b/src/ukrainian_stt.py new file mode 100644 index 0000000..98411a4 --- /dev/null +++ b/src/ukrainian_stt.py @@ -0,0 +1,57 @@ +"Ukrainian Speech-to-text converter based on Wav2Vec2-Bert architecture" +import soundfile as sf +import torch +from transformers import AutoModelForCTC, Wav2Vec2BertProcessor +from transformers.utils.logging import set_verbosity_error + +set_verbosity_error() + + +def ua_transcribe( + file_paths="./data/wav/UA_test_2.wav", + model_name="Yehor/w2v-bert-2.0-uk", + device="cuda:0", + sampling_rate=16000, +) -> str: + """Transcribes Ukrainian audio using Wav2Vec2-Bert. + + Args: + file_paths: Audio file path(s). Defaults to "./data/wav/UA_test.wav". + model_name: Pre-trained model name. Defaults to "Yehor/w2v-bert-2.0-uk". + device: Device for computation. Defaults to "cuda:0" if available. + sampling_rate: Audio sampling rate. Defaults to 16000. + + Returns: + Transcribed text. + + Raises: + Exception: On errors. + """ + + # Load the Wav2Vec2-Bert model and processor + asr_model = AutoModelForCTC.from_pretrained(model_name).to(device) + processor = Wav2Vec2BertProcessor.from_pretrained(model_name) + + # Extract audio data from the provided file paths + audio_inputs = [] + file_paths = [file_paths] # Ensure file_paths is a list + for path in file_paths: + audio_input, _ = sf.read(path) + audio_inputs.append(audio_input) + + # Preprocess the audio for model input + inputs = processor(audio_inputs, sampling_rate=sampling_rate).input_features + features = torch.tensor(inputs).to(device) + + # Perform audio transcription with no gradient calculation + with torch.no_grad(): + logits = asr_model(features).logits + + # Decode the predicted token IDs to text + predicted_ids = torch.argmax(logits, dim=-1) + prdct = processor.batch_decode(predicted_ids)[0] + return prdct + + +if __name__ == "__main__": + print(ua_transcribe()) diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..f4a463f --- /dev/null +++ b/src/utils.py @@ -0,0 +1,46 @@ +"""Converts audio from stereo to mono""" + +import re +import speech_recognition as sr + + +def convert_audio_to_wav(audio_file, output_file) -> sr.AudioFile: + """Converts audio file to a 16000 Hz WAV file. + + Args: + audio_file: Path to the input audio file. + output_file: Path to the output WAV file. + """ + + recognizer = sr.Recognizer() + recognizer.pause_threshold = 0.8 + + with sr.AudioFile(audio_file) as source: + audio = recognizer.record(source) + + wav_data = audio.get_wav_data(convert_rate=16000) + + with open(output_file, "wb") as file: + file.write(wav_data) + + +def check_language(user_prompt): + """Checks if the user prompt contains EN characters and assigns a language prefix accordingly. + Args: + user_prompt (str): The user-provided prompt. + Returns: + str: The user prompt with a language prefix ("en:" or "ua:") based on the presence\ + of English characters. + """ + english_pattern = re.compile(r"[a-zA-Z]") + # cyrillic_pattern = re.compile(r"[\u0400-\u04FF]") + if english_pattern.search(user_prompt): + return "en: " + user_prompt + else: + return "ua: " + user_prompt + +if __name__=="__main__": + # Example usage: + SAMPLE = "Краще напишу/I'll write instead" + RESULT = check_language(SAMPLE) + print(RESULT) # Output: ua: Краще напишу/I'll write instead