diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f0dc1ee
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+.conda
+.vscode
+__pycache__
+data/model_data
+data/HISTORY.json
+flagged
+src/__pycache__
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b882d6d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,48 @@
+# UA-EN Voice Assistant 
+## No api-keys | local | llama3.1 (12k tokens prompt-menu ~20 pages in single request)
+![GitHub](https://img.shields.io/github/license/Katashynskyi/voice_assistant_UA_EN)
+![GitHub last commit](https://img.shields.io/github/last-commit/Katashynskyi/voice_assistant_UA_EN)
+![Gif](data/media/gif.gif)
+
+
+[![GUI version](data/media/GUI_V.png)](https://youtu.be/iw9P4Y7KXI4)
+[![Console version](data/media/CONSOLE_V.png)](https://youtu.be/c-8Z4qzOcII)
+
+## The Idea
+This project serves as a proof-of-concept for a minimum viable product (MVP) inspired by the capabilities of the OMNI model from ChatGPT. However, it offers a significant advantage: local deployment without restrictions. This empowers users to leverage its functionalities for various purposes, including:
+- Translation across languages
+- Learning Enhancement by practicing writing, reading, and audio skills
+- Customization for tailored use cases
+
+## Features & Tech stack
+
+- **Language Classification**: classify if it's UA or EN for authomatic mode."Lang-id-voxlingua107-ecapa" by speechbrain (supports 100+ lang's).
+- **Google legacy recognizer**: it uses a generic key that works out of the box. It's fast and works well.
+- **Wav2Vec2-Bert**: best (for now) Ukrainian Speech-to-text converter.
+- **Edge-TTS**: best (not generated) voices I can get for free.
+- **Ollama-python**: lib to download and use most popular LLM's.
+- **Streamlit**: for GUI.
+- **dialogue saved in json**: HISTORY.json (only for main.py. For app.py it's only short-term context-window memory).
+- **Config.py**: prompt for best user experience (modify it for your own purposes).
+
+
+## Getting Started
+### Tested on
+- WSL 22.04.3
+- Geforce (mobile) GTX 1050Ti (4GB)
+- RAM (32GB)
+### Prerequisites
+- Python 3.9+
+- Virtual environment (Conda 3.9+)
+- CUDA (optional)
+### Installation
+
+- Clone the repository
+- Create conda venv (Conda 3.9)
+- sudo apt install portaudio19-dev
+- Install the required packages: pip install -r requirements.txt
+
+## Usage
+
+After installation of required libs run main.py for console experience or app.py for GUI lovers.
+
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..db822f1
--- /dev/null
+++ b/app.py
@@ -0,0 +1,184 @@
+"""Python script for a voice assistant using Streamlit and Ollama
+
+This script creates a user interface using Streamlit to interact with a large language model\
+    (LLM) from Ollama for voice-based and text-based communication.
+
+Features:
+  - Audio recording using Streamlit's `audio_recorder` component.
+  - Speech recognition for Ukrainian (UA) and English (EN) using custom functions.
+  - Automatic language detection based on transcribed text.
+  - Text input for user prompts.
+  - Streamlit chat interface for displaying conversation history.
+  - Interaction with Ollama's LLM for generating responses.
+  - Text-to-speech functionality (not implemented in this code).
+"""
+
+import warnings
+import asyncio
+import ollama
+import streamlit as st
+from audio_recorder_streamlit import audio_recorder
+from src.ukrainian_stt import ua_transcribe
+from src.english_stt import en_transcribe
+from src.transcribe_speak import transcribe_and_speak
+from src.utils import convert_audio_to_wav, check_language
+from src.identify_lang import identify_language
+from config import SYS_MSG
+
+# Suppress warnings
+warnings.filterwarnings("ignore")
+
+# File paths
+RECORDED_WAV_FILE = "./data/wav/microphone_stereo.wav"
+CONV_WAV_FILE = "./data/wav/converted_mono.wav"
+WAV_FILE = "./data/wav/chunk.wav"
+
+# Initial conversation history
+HISTORY = [{"role": "system", "content": SYS_MSG}]
+
+# Streamlit page configuration
+st.set_page_config(
+    page_title="Voice assistant UA-EN",
+    page_icon=":trident:",
+    layout="wide",
+    initial_sidebar_state="auto",
+    menu_items=None,
+)
+
+
+def ollama_prompt(model="llama3.1", messages=None):
+    """
+    Sends a prompt to the Ollama LLM and returns a stream of responses.
+
+    Args:
+        model (str, optional): The Ollama model to use. Defaults to "llama3.1".
+        messages (list, optional): A list of dictionaries representing the conversation history.
+                                  Defaults to None.
+
+    Returns:
+        stream: An asynchronous stream of dictionaries containing the LLM's responses.
+    """
+    stream = ollama.chat(model=model, messages=messages, stream=True)
+    return stream
+
+
+def stream_parser(stream):
+    """
+    Parses the stream of responses from the LLM and displays them in the Streamlit chat interface.
+
+    Args:
+        stream: An asynchronous stream of dictionaries containing the LLM's responses.
+
+    Yields:
+        str: Each chunk of the LLM's response.
+    """
+    sentence_chunks, response_text = "", ""
+    st.session_state.messages.append({"role": "assistant", "content": response_text})
+    print("Assistant: ", end="")
+    for chunk in stream:
+        print(chunk["message"]["content"], end="", flush=True)
+        content = chunk["message"]["content"]
+        sentence_chunks += content
+        response_text += content
+        st.session_state.messages[-1]["content"] += content
+        if sentence_chunks.endswith(
+            ('."', "\n\n", "**:", ".", "!", "?", '?"', '!"', ":")
+        ):
+            if any("\u0400" <= char <= "\u04FF" for char in sentence_chunks):
+                lang = "ua"
+            else:
+                lang = "en"
+            asyncio.run(transcribe_and_speak(text=sentence_chunks, lang=lang))
+            sentence_chunks = ""
+        yield chunk["message"]["content"]
+
+
+def stop_running():
+    """The Dummy.Currently does nothing."""
+    with my_slot1.chat_message("user"):
+        st.markdown("Stop!")
+
+# Setup order of elements
+my_slot0 = st.empty()  # most buttons
+my_slot1 = st.empty()  # chat_message("user")
+my_slot2 = st.empty()  # chat_message(message["role"] & chat_message("assistant")
+
+
+# Streamlit custom microphone
+col1, col2 = my_slot0.columns([1, 8.5], vertical_alignment="bottom")
+with col1:
+    audio_bytes = audio_recorder(
+        text="", energy_threshold=0.01, icon_size="5x"
+    )  # if energy_threshold negative - never stops
+if audio_bytes is not None and len(audio_bytes) != 44:
+    st.audio(audio_bytes, format="audio/wav")
+    with open(file=RECORDED_WAV_FILE, mode="wb") as f:
+        f.write(audio_bytes)
+        f.close()
+    convert_audio_to_wav(audio_file=RECORDED_WAV_FILE, output_file=CONV_WAV_FILE)
+# Choose language buttons
+with col2:
+    PRMPT = None
+    button0, button1, button2, button3 = st.columns(4)
+    with button0:
+        if st.button("Stop", use_container_width=True, type="primary"):
+            stop_running()
+    with button1:
+        if st.button("Говорю (UA)", use_container_width=True):
+            PRMPT = "ua:" + ua_transcribe(CONV_WAV_FILE)
+            print(PRMPT)
+    with button2:
+        if st.button("Talking (EN)", use_container_width=True):
+            PRMPT = "en:" + en_transcribe(CONV_WAV_FILE)
+            print(PRMPT)
+    with button3:
+        if st.button("Automatic", use_container_width=True):
+            DEF_LANG = "???"
+            DEF_LANG = identify_language(CONV_WAV_FILE)
+            if DEF_LANG in [
+                ["uk: Ukrainian"],
+                ["pl: Polish"],
+                ["ru: Russian"],
+                ["be: Belarusian"],
+            ]:
+                PRMPT = "ua:" + ua_transcribe(CONV_WAV_FILE)
+                print(PRMPT)
+            else:
+                PRMPT = "en:" + en_transcribe(CONV_WAV_FILE)
+                if PRMPT == "Didn't recognize that.":
+                    print(PRMPT)
+                    PRMPT = None
+                else:
+                    print(PRMPT)
+    user_prompt = st.chat_input(placeholder="Краще напишу/I'll write instead")
+    if user_prompt is not None:
+        user_prompt = check_language(user_prompt=user_prompt)
+# Checks for existing messages in session state
+if "messages" not in st.session_state:
+    st.session_state.messages = HISTORY
+
+# Display chat messages from session state
+for message in st.session_state.messages:
+    with my_slot2.chat_message(message["role"]):
+        st.markdown(message["content"])
+
+if user_prompt is not None or PRMPT is not None:
+    # Display user prompt in chat message widget
+    with my_slot1.chat_message("user"):
+        print()
+        print("User:", end="")
+        print(user_prompt or PRMPT)
+        st.markdown(user_prompt or PRMPT)
+
+    # adds user's prompt to session state
+    st.session_state.messages.append({"role": "user", "content": user_prompt or PRMPT})
+
+    # retrieves response from model
+    LLM_STREAM = ollama_prompt(
+        messages=st.session_state.messages,
+    )
+    with my_slot2.chat_message("assistant"):
+        try:
+            st.write(stream_parser(LLM_STREAM))
+        except stop_running():
+            pass
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..6ce38da
--- /dev/null
+++ b/config.py
@@ -0,0 +1,11 @@
+"""File with instructions what LLM must do, rewrite it as much as you wish"""
+
+SYS_MSG = "You are a helpful AI voice assistant (your name is Bot). Generate the most useful\
+and factual response possible, carefully considering all previous generated text in your response\
+before adding new tokens to the response. Use all of the context of this conversation so your\
+response is relevant to the conversation. Make your responses clear and concise, avoiding any\
+verbosity. You'll mostly be asked questions in English or Ukrainian. It's mandatory to avoid mixing languages within a single\
+sentence. You can provide your response entirely in English or entirely in Ukrainian.\
+If you need to use words from both languages in the same sentence, consider transliterating \
+(especially names!) one of them. If prompt contain 'ua:' answer in UA if contain 'en:' in english \
+(both ways use transliteration if needed)"
diff --git a/data/chunk copy.wav b/data/chunk copy.wav
new file mode 100644
index 0000000..af56144
--- /dev/null
+++ b/data/chunk copy.wav	
@@ -0,0 +1 @@
+This is some sample data
\ No newline at end of file
diff --git a/data/chunk.wav b/data/chunk.wav
new file mode 120000
index 0000000..9f87985
--- /dev/null
+++ b/data/chunk.wav
@@ -0,0 +1 @@
+/home/mr/repositories/VOICE_ASSISTANT/data/wav/chunk.wav
\ No newline at end of file
diff --git a/data/converted_mono.wav b/data/converted_mono.wav
new file mode 120000
index 0000000..7271a1f
--- /dev/null
+++ b/data/converted_mono.wav
@@ -0,0 +1 @@
+/home/mr/repositories/VOICE_ASSISTANT/data/wav/converted_mono.wav
\ No newline at end of file
diff --git a/data/media/CONSOLE_V.png b/data/media/CONSOLE_V.png
new file mode 100644
index 0000000..bd4c0e0
Binary files /dev/null and b/data/media/CONSOLE_V.png differ
diff --git a/data/media/GUI_V.png b/data/media/GUI_V.png
new file mode 100644
index 0000000..46c297f
Binary files /dev/null and b/data/media/GUI_V.png differ
diff --git a/data/media/gif.gif b/data/media/gif.gif
new file mode 100644
index 0000000..1123167
Binary files /dev/null and b/data/media/gif.gif differ
diff --git a/data/wav/EN_test.wav b/data/wav/EN_test.wav
new file mode 100644
index 0000000..96c8918
Binary files /dev/null and b/data/wav/EN_test.wav differ
diff --git a/data/wav/UA_test.wav b/data/wav/UA_test.wav
new file mode 100644
index 0000000..a420ba5
Binary files /dev/null and b/data/wav/UA_test.wav differ
diff --git a/data/wav/chunk.wav b/data/wav/chunk.wav
new file mode 100644
index 0000000..ca111cb
Binary files /dev/null and b/data/wav/chunk.wav differ
diff --git a/data/wav/converted_mono.wav b/data/wav/converted_mono.wav
new file mode 100644
index 0000000..4fd3ad8
Binary files /dev/null and b/data/wav/converted_mono.wav differ
diff --git a/data/wav/microphone_stereo.wav b/data/wav/microphone_stereo.wav
new file mode 100644
index 0000000..1bf67f1
Binary files /dev/null and b/data/wav/microphone_stereo.wav differ
diff --git a/data/wav/transcribed_piece.mp3 b/data/wav/transcribed_piece.mp3
new file mode 100644
index 0000000..685c30e
Binary files /dev/null and b/data/wav/transcribed_piece.mp3 differ
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..bdaa050
--- /dev/null
+++ b/main.py
@@ -0,0 +1,107 @@
+"""Main.py provide entry point to terminal based voice agent with memory and speech abilities \
+in UA and EN voices. With this configuration it can be used for studying english or ukrainian, \
+both writing and listening skills"""
+
+import json
+import sounddevice  # fixing ALSA errors
+import speech_recognition as sr
+from src.english_stt import en_transcribe
+from src.identify_lang import identify_language
+from src.ollama_tts import ollama_prompt
+from src.ukrainian_stt import ua_transcribe
+from config import SYS_MSG
+
+
+def main(memory=False):
+    """
+    The main entry point for the application.
+
+    Args:
+        memory (bool, optional): Whether to load and save conversation history. Defaults to False.
+
+    Returns:
+        None
+
+    Automatically detect your language, transcribe it and pronounce & write the answer in UA & EN.
+    Optionally save conversation history to a JSON file
+    """
+    if memory is True:
+        try:
+            with open("data/HISTORY.json", "r", encoding="UTF-8") as f:
+                history = json.load(f)
+        except FileNotFoundError:
+            print(
+                "No previous history is available, creating new one.",
+                "\t",
+                "Попередньої розмови не знайдено, створюю розмову.",
+            )
+            history = [{"role": "system", "content": SYS_MSG}]
+    try:
+        listening = True
+        while listening:
+            with sr.Microphone() as source:
+                recognizer = sr.Recognizer()
+                recognizer.pause_threshold = 0.8
+                recognizer.energy_threshold = 500
+                recognizer.adjust_for_ambient_noise(source)  # , duration=1)
+                # recognizer.dynamic_energy_threshold = 3000
+
+                def_lang = "???"
+                try:
+                    print("Listening...\t\t Слухаю...")
+                    audio = recognizer.listen(source, timeout=5.0)
+                    print("Working on it...\t Обробка...")
+                    # Save the audio data to a WAV file
+                    wav_data = audio.get_wav_data(convert_rate=16000)
+                    # Write the WAV data to a file
+                    wav_file = "./data/wav/chunk.wav"
+                    with open(wav_file, "wb") as file:
+                        file.write(wav_data)
+                except sr.UnknownValueError:
+                    print(
+                        "Could you repeat please?.It's not recognized",
+                        "\t\t",
+                        "Повторіть будь ласка, не розчула",
+                    )
+                def_lang = identify_language(wav_file)
+                if def_lang in [
+                    ["uk: Ukrainian"],
+                    ["pl: Polish"],
+                    ["ru: Russian"],
+                    ["be: Belarusian"],
+                ]:
+                    print("Запит Солов'їною, обробка...")
+                    prmpt = ua_transcribe(wav_file)
+                    print("Користувач:", prmpt)
+                    print("Дай подумати...")
+                    ollama_prompt(prompt="ua: " + prmpt, history=history)
+                else:
+                    print("Detected as english, working on it...")
+                    prmpt = en_transcribe(wav_file)
+                    if prmpt == "Didn't recognize that.":
+                        print("Didn't recognize that.\t\t Не зрозуміла.")
+                    else:
+                        print("User:", prmpt)
+                        print("Wait for LLM to answer... Зараз відповім...")
+                        ollama_prompt(prompt="en: " + prmpt, history=history)
+                    print("\n", "\n")
+
+    except KeyboardInterrupt:
+        listening = False
+        print("\n", "Stopped listening.\t Перервано.")
+        print("\n", "\n")
+        print("HISTORY:", history)
+        # Also we can save history to reuse it later
+        # Specify the file path and name
+        history_path = "./data/HISTORY.json"
+
+        # Serialize the list to JSON
+        json_data = json.dumps(history)
+
+        # Write the JSON data to the file
+        with open(history_path, "w", encoding="utf-8") as file:
+            file.write(json_data)
+
+
+if __name__ == "__main__":
+    main(memory=True)
diff --git a/src/english_stt.py b/src/english_stt.py
new file mode 100644
index 0000000..528a292
--- /dev/null
+++ b/src/english_stt.py
@@ -0,0 +1,26 @@
+"""Module providing speech recognition"""
+
+import speech_recognition as sr
+
+
+def en_transcribe(wav_filename="./data/wav/EN_test.wav"):
+    """Function transcribe/recognize english wav."""
+    recognizer = sr.Recognizer()
+
+    # Open the audio file and recognize it
+    with sr.AudioFile(wav_filename) as source:
+        audio_data = recognizer.record(source)
+        try:
+            return recognizer.recognize_google(audio_data)
+        except sr.UnknownValueError:
+            return "Didn't recognize that."
+        except sr.RequestError as e:
+            return f"Could not request results; {e}"
+
+
+# Example usage
+if __name__ == "__main__":
+    SAMPLE = "./data/wav/EN_test.wav"
+    response = en_transcribe(SAMPLE)
+    print()
+    print("Response: ", response)
diff --git a/src/identify_lang.py b/src/identify_lang.py
new file mode 100644
index 0000000..e24e9e9
--- /dev/null
+++ b/src/identify_lang.py
@@ -0,0 +1,35 @@
+"Python"
+import warnings
+from speechbrain.inference.classifiers import EncoderClassifier
+from torch.cuda import is_available
+
+# Suppress warnings
+warnings.filterwarnings("ignore")
+
+
+def identify_language(wav_filename="./data/wav/UA_test.wav") -> list:
+    """
+    Identifies the language of an audio file using a pre-trained language identification model.
+
+    Args:
+        wav_filename (str, optional): Path to the audio file. Defaults to "UA_test.wav".
+
+    Returns:
+        list: Predicted language ID.
+    """
+    # Load the pre-trained language identification model
+    language_id = EncoderClassifier.from_hparams(
+        source="speechbrain/lang-id-voxlingua107-ecapa",
+        savedir="./data/model_data/",
+        run_opts="cuda" if is_available() else None,
+    )
+    # Load the audio file
+    signal = language_id.load_audio(wav_filename, savedir="./data/")
+    # Classify the audio file
+    prediction = language_id.classify_batch(signal)
+    # Return the predicted language ID
+    return prediction[3]
+
+
+if __name__ == "__main__":
+    print(identify_language())
diff --git a/src/ollama_tts.py b/src/ollama_tts.py
new file mode 100644
index 0000000..0f69e44
--- /dev/null
+++ b/src/ollama_tts.py
@@ -0,0 +1,69 @@
+"Run terminal, write to it and get text&audio response"
+
+import sys
+
+import asyncio
+from ollama import chat
+
+sys.path.append("./")
+from src.transcribe_speak import transcribe_and_speak
+from config import SYS_MSG
+
+HISTORY = [{"role": "system", "content": SYS_MSG}]
+
+
+def ollama_prompt(prompt: str = None, model="llama3.1", history: list = None) -> list:
+    """
+    Sends a prompt to the Ollama LLM and interacts with the user in a continuous loop.
+
+    Args:
+        prompt (str, optional): User's input prompt. Defaults to None.
+        model (str, optional): The Ollama model to use. Defaults to "llama3.1".
+        history (list, optional): A list of dictionaries representing the conversation history.\
+            Defaults to None.
+
+    Returns:
+        None
+    """
+    history.append({"role": "user", "content": prompt})
+    stream = chat(model=model, messages=history, stream=True)
+
+    sentence_chunks, response_text = "", ""
+    print("Assistant: ", end="")
+
+    try:
+        # Process each chunk in the LLM's response stream
+        for part in stream:  # not bug but feature
+            print(part["message"]["content"], end="", flush=True)
+            content = part["message"]["content"]
+            sentence_chunks += content
+            response_text += content
+
+            # Check for sentence ending and perform language detection and text-to-speech
+            if sentence_chunks.endswith(("**:", ".", "!", "?", '?"', '!"', ":", ")")):
+                if any("\u0400" <= char <= "\u04FF" for char in sentence_chunks):
+                    lang = "ua"
+                else:
+                    lang = "en"
+                asyncio.run(transcribe_and_speak(text=sentence_chunks, lang=lang))
+                sentence_chunks = ""
+
+        # Add the final response and updated history to conversation history
+        history.append({"role": "assistant", "content": response_text})
+
+    except KeyboardInterrupt:
+        history.append({"role": "assistant", "content": response_text})
+    return print("")
+
+
+if __name__ == "__main__":
+    try:
+        TALKING = True
+        while TALKING:
+            print("User:", end="")
+            ollama_prompt(prompt=input(), model="llama3.1", history=HISTORY)
+            print()
+    except KeyboardInterrupt:
+        TALKING = False
+        print()
+        print("HISTORY:", HISTORY)
diff --git a/src/transcribe_speak.py b/src/transcribe_speak.py
new file mode 100644
index 0000000..f12d1c1
--- /dev/null
+++ b/src/transcribe_speak.py
@@ -0,0 +1,52 @@
+"""Text-To-Speech Interface with Pygame Audio Playback
+
+This module provides a function (`tts`) to convert text to speech using Edge TTS
+and play the generated audio using Pygame. It supports English (`en`) and Ukrainian
+(`uk`, `ua`) languages (for now).
+"""
+
+from os import environ
+import asyncio
+import edge_tts
+
+environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide"  # must be before import pygame
+import pygame
+
+
+async def transcribe_and_speak(text="Nice brackets, John!", lang="en") -> pygame.mixer.music:
+    """
+    Converts text to speech using Edge TTS and plays the generated audio.
+
+    Args:
+        text (str, optional): The text to be spoken. Defaults to "Nice brackets, John!".
+        lang (str, optional): The language for the TTS voice. Defaults to "en" (English).
+            - Supported languages: "en" (English), "uk" (Ukrainian), "ua" (Ukrainian)
+
+    Raises:
+        ValueError: If the provided language is not supported.
+    """
+    if lang in ["en"]:
+        voice = "en-GB-SoniaNeural"
+    elif lang in ["uk", "ua"]:
+        voice = "uk-UA-PolinaNeural"
+    else:
+        raise ValueError(f"Unsupported language: {lang}")
+    output_file = "./data/wav/transcribed_piece.mp3"
+    # Generate TTS audio
+    communicate = edge_tts.Communicate(text=text, voice=voice, rate="+30%")
+    await communicate.save(output_file)
+
+    # Play the audio file
+    pygame.mixer.init()
+    pygame.mixer.music.load(output_file)
+    pygame.mixer.music.play()
+
+    while pygame.mixer.music.get_busy():
+        pygame.time.Clock().tick(1)
+
+
+if __name__ == "__main__":
+    PROMPT = "I'd be happy to try singing for you."
+    asyncio.run(transcribe_and_speak(text=PROMPT, lang="en"))
+    PROMPT = "Я б залюбки для тебе заспівала"
+    asyncio.run(transcribe_and_speak(text=PROMPT, lang="ua"))
diff --git a/src/ukrainian_stt.py b/src/ukrainian_stt.py
new file mode 100644
index 0000000..98411a4
--- /dev/null
+++ b/src/ukrainian_stt.py
@@ -0,0 +1,57 @@
+"Ukrainian Speech-to-text converter based on Wav2Vec2-Bert architecture"
+import soundfile as sf
+import torch
+from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
+from transformers.utils.logging import set_verbosity_error
+
+set_verbosity_error()
+
+
+def ua_transcribe(
+    file_paths="./data/wav/UA_test_2.wav",
+    model_name="Yehor/w2v-bert-2.0-uk",
+    device="cuda:0",
+    sampling_rate=16000,
+) -> str:
+    """Transcribes Ukrainian audio using Wav2Vec2-Bert.
+
+    Args:
+        file_paths: Audio file path(s). Defaults to "./data/wav/UA_test.wav".
+        model_name: Pre-trained model name. Defaults to "Yehor/w2v-bert-2.0-uk".
+        device: Device for computation. Defaults to "cuda:0" if available.
+        sampling_rate: Audio sampling rate. Defaults to 16000.
+
+    Returns:
+        Transcribed text.
+
+    Raises:
+        Exception: On errors.
+    """
+
+    # Load the Wav2Vec2-Bert model and processor
+    asr_model = AutoModelForCTC.from_pretrained(model_name).to(device)
+    processor = Wav2Vec2BertProcessor.from_pretrained(model_name)
+
+    # Extract audio data from the provided file paths
+    audio_inputs = []
+    file_paths = [file_paths]  # Ensure file_paths is a list
+    for path in file_paths:
+        audio_input, _ = sf.read(path)
+        audio_inputs.append(audio_input)
+
+    # Preprocess the audio for model input
+    inputs = processor(audio_inputs, sampling_rate=sampling_rate).input_features
+    features = torch.tensor(inputs).to(device)
+
+    # Perform audio transcription with no gradient calculation
+    with torch.no_grad():
+        logits = asr_model(features).logits
+
+    # Decode the predicted token IDs to text
+    predicted_ids = torch.argmax(logits, dim=-1)
+    prdct = processor.batch_decode(predicted_ids)[0]
+    return prdct
+
+
+if __name__ == "__main__":
+    print(ua_transcribe())
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000..f4a463f
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,46 @@
+"""Converts audio from stereo to mono"""
+
+import re
+import speech_recognition as sr
+
+
+def convert_audio_to_wav(audio_file, output_file) -> sr.AudioFile:
+    """Converts audio file to a 16000 Hz WAV file.
+
+    Args:
+      audio_file: Path to the input audio file.
+      output_file: Path to the output WAV file.
+    """
+
+    recognizer = sr.Recognizer()
+    recognizer.pause_threshold = 0.8
+
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+
+    wav_data = audio.get_wav_data(convert_rate=16000)
+
+    with open(output_file, "wb") as file:
+        file.write(wav_data)
+
+
+def check_language(user_prompt):
+    """Checks if the user prompt contains EN characters and assigns a language prefix accordingly.
+    Args:
+        user_prompt (str): The user-provided prompt.
+    Returns:
+        str: The user prompt with a language prefix ("en:" or "ua:") based on the presence\
+          of English characters.
+    """
+    english_pattern = re.compile(r"[a-zA-Z]")
+    # cyrillic_pattern = re.compile(r"[\u0400-\u04FF]")
+    if english_pattern.search(user_prompt):
+        return "en: " + user_prompt
+    else:
+        return "ua: " + user_prompt
+
+if __name__=="__main__":
+    # Example usage:
+    SAMPLE = "Краще напишу/I'll write instead"
+    RESULT = check_language(SAMPLE)
+    print(RESULT)  # Output: ua: Краще напишу/I'll write instead