From 4e324c245bc695deb620b50fbf105d5bcbc37375 Mon Sep 17 00:00:00 2001
From: Gilmm27 <A00839075@tec.mx>
Date: Sun, 19 Jan 2025 15:02:39 -0800
Subject: [PATCH 1/6] mulithread hear service implementation

---
 hri/packages/speech/config/hear.yaml        |  4 +-
 hri/packages/speech/scripts/hear.py         | 69 ++++++++++++++++++---
 hri/packages/speech/scripts/useful_audio.py | 18 +++---
 3 files changed, 72 insertions(+), 19 deletions(-)

diff --git a/hri/packages/speech/config/hear.yaml b/hri/packages/speech/config/hear.yaml
index 82a3720..db0bafb 100644
--- a/hri/packages/speech/config/hear.yaml
+++ b/hri/packages/speech/config/hear.yaml
@@ -1,3 +1,5 @@
 hear:
   ros__parameters:
-    STT_SERVER_IP: "127.0.0.1:50051"
\ No newline at end of file
+    STT_SERVER_IP: "127.0.0.1:50051"
+    START_SERVICE: True
+    detection_publish_topic: "/keyword_detected"
\ No newline at end of file
diff --git a/hri/packages/speech/scripts/hear.py b/hri/packages/speech/scripts/hear.py
index 303d211..820076a 100755
--- a/hri/packages/speech/scripts/hear.py
+++ b/hri/packages/speech/scripts/hear.py
@@ -2,10 +2,12 @@
 
 import rclpy
 from rclpy.node import Node
-from rclpy.executors import ExternalShutdownException
+from rclpy.executors import ExternalShutdownException, MultiThreadedExecutor
+from rclpy.callback_groups import ReentrantCallbackGroup
 import grpc
 from frida_interfaces.msg import AudioData
-from std_msgs.msg import String
+from frida_interfaces.srv import STT
+from std_msgs.msg import Bool, String
 from speech.speech_api_utils import SpeechApiUtils
 
 import sys
@@ -37,12 +39,18 @@ def __init__(self):
         super().__init__("hear_node")
         self.get_logger().info("*Starting Hear Node*")
 
-        # Get the gRPC server address from parameters
         server_ip = (
             self.declare_parameter("STT_SERVER_IP", "127.0.0.1:50051")
             .get_parameter_value()
             .string_value
         )
+        start_service = (
+            self.declare_parameter("START_SERVICE", False)
+            .get_parameter_value()
+            .bool_value
+        )
+
+        # Initialize the Whisper gRPC client
         self.client = WhisperClient(server_ip)
 
         # Create a publisher for the transcriptions
@@ -50,11 +58,37 @@ def __init__(self):
             String, "/speech/raw_command", 10
         )
 
+        # Create groups for the subscription and service
+        multithread_group = ReentrantCallbackGroup()
+
         # Subscribe to audio data
         self.audio_subscription = self.create_subscription(
-            AudioData, "UsefulAudio", self.callback_audio, 10
+            AudioData,
+            "UsefulAudio",
+            self.callback_audio,
+            10,
+            callback_group=multithread_group,
         )
 
+        # Create a service
+        self.service_active = False
+        if start_service:
+            self.service_text = ""
+            detection_publish_topic = (
+                self.declare_parameter("detection_publish_topic", "/keyword_detected")
+                .get_parameter_value()
+                .string_value
+            )
+            self.KWS_publisher_mock = self.create_publisher(
+                Bool, detection_publish_topic, 10
+            )
+            self.stt_service = self.create_service(
+                STT,
+                "stt_service",
+                self.stt_service_callback,
+                callback_group=multithread_group,
+            )
+
         self.get_logger().info("*Hear Node is ready*")
 
     def callback_audio(self, data):
@@ -75,21 +109,42 @@ def callback_audio(self, data):
             # Publish the transcription
             msg = String()
             msg.data = transcription
-            self.transcription_publisher.publish(msg)
-            self.get_logger().info("Transcription published to ROS topic.")
+
+            if self.service_active:
+                # If the service is active, store the transcription
+                self.service_text = transcription
+                self.service_active = False
+            else:
+                # If the service is not active, publish the transcription
+                self.transcription_publisher.publish(msg)
+                self.get_logger().info("Transcription published to ROS topic.")
         except grpc.RpcError as e:
             self.get_logger().error(f"gRPC error: {e.code()}, {e.details()}")
         except Exception as ex:
             self.get_logger().error(f"Error during transcription: {str(ex)}")
 
+    def stt_service_callback(self, request, response):
+        self.get_logger().info("STT service requested")
+        self.service_active = True
+        self.KWS_publisher_mock.publish(Bool(data=True))
+        while self.service_active:
+            pass
+        response.text_heard = self.service_text
+        # self.get_logger().info(f"STT service response: {response.text_heard}")
+        return response
+
 
 def main(args=None):
     rclpy.init(args=args)
+    node = HearNode()
+    executor = MultiThreadedExecutor()
+    executor.add_node(node)
     try:
-        rclpy.spin(HearNode())
+        executor.spin()
     except (ExternalShutdownException, KeyboardInterrupt):
         pass
     finally:
+        node.destroy_node()
         rclpy.shutdown()
 
 
diff --git a/hri/packages/speech/scripts/useful_audio.py b/hri/packages/speech/scripts/useful_audio.py
index a7bba09..06dafeb 100755
--- a/hri/packages/speech/scripts/useful_audio.py
+++ b/hri/packages/speech/scripts/useful_audio.py
@@ -76,7 +76,6 @@ def __init__(self):
         self.timer = None
         self.is_saying = False
         self.audio_state = "None"
-        self.service_active = False
 
         self.publisher = self.create_publisher(AudioData, "UsefulAudio", 20)
         self.audio_state_publisher = self.create_publisher(String, "AudioState", 10)
@@ -89,7 +88,7 @@ def __init__(self):
             AudioData, "rawAudioChunk", self.callback_raw_audio, 10
         )
         self.create_subscription(Bool, "saying", self.callback_saying, 10)
-        self.create_subscription(Bool, "keyword_detected", self.callback_keyword, 10)
+        self.create_subscription(Bool, "/keyword_detected", self.callback_keyword, 10)
 
         if not self.use_silero_vad:
             self.vad = webrtcvad.Vad()
@@ -126,10 +125,9 @@ def build_audio(self, data):
         self.chunk_count += 1
 
     def discard_audio(self):
-        if not self.service_active:
-            self.ring_buffer.clear()
-            self.voiced_frames = None
-            self.chunk_count = 0
+        self.ring_buffer.clear()
+        self.voiced_frames = None
+        self.chunk_count = 0
 
     def publish_audio(self):
         if self.chunk_count > MIN_CHUNKS_AUDIO_LENGTH:
@@ -207,8 +205,7 @@ def vad_collector(self, chunk):
             ):
                 self.triggered = False
                 self.compute_audio_state()
-                if not self.service_active:
-                    self.publish_audio()
+                self.publish_audio()
                 self.timer = None
 
     def callback_raw_audio(self, msg):
@@ -225,9 +222,8 @@ def callback_saying(self, msg):
 
     def callback_keyword(self, msg):
         self.triggered = True
-        if not self.service_active:
-            self.discard_audio()
-            self.compute_audio_state()
+        self.discard_audio()
+        self.compute_audio_state()
 
     def compute_audio_state(self):
         new_state = (

From 3f9ea734842deeb2feecf7e50800c44b1a1c9063 Mon Sep 17 00:00:00 2001
From: Gilmm27 <A00839075@tec.mx>
Date: Sun, 19 Jan 2025 16:15:30 -0800
Subject: [PATCH 2/6] changed to mutually exclusive

---
 hri/packages/speech/scripts/hear.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hri/packages/speech/scripts/hear.py b/hri/packages/speech/scripts/hear.py
index 820076a..987b9e5 100755
--- a/hri/packages/speech/scripts/hear.py
+++ b/hri/packages/speech/scripts/hear.py
@@ -3,7 +3,7 @@
 import rclpy
 from rclpy.node import Node
 from rclpy.executors import ExternalShutdownException, MultiThreadedExecutor
-from rclpy.callback_groups import ReentrantCallbackGroup
+from rclpy.callback_groups import MutuallyExclusiveCallbackGroup
 import grpc
 from frida_interfaces.msg import AudioData
 from frida_interfaces.srv import STT
@@ -59,7 +59,8 @@ def __init__(self):
         )
 
         # Create groups for the subscription and service
-        multithread_group = ReentrantCallbackGroup()
+        subscription_group = MutuallyExclusiveCallbackGroup()
+        service_group = MutuallyExclusiveCallbackGroup()
 
         # Subscribe to audio data
         self.audio_subscription = self.create_subscription(
@@ -67,7 +68,7 @@ def __init__(self):
             "UsefulAudio",
             self.callback_audio,
             10,
-            callback_group=multithread_group,
+            callback_group=subscription_group,
         )
 
         # Create a service
@@ -86,7 +87,7 @@ def __init__(self):
                 STT,
                 "stt_service",
                 self.stt_service_callback,
-                callback_group=multithread_group,
+                callback_group=service_group,
             )
 
         self.get_logger().info("*Hear Node is ready*")
@@ -124,13 +125,12 @@ def callback_audio(self, data):
             self.get_logger().error(f"Error during transcription: {str(ex)}")
 
     def stt_service_callback(self, request, response):
-        self.get_logger().info("STT service requested")
+        self.get_logger().info("Keyword mock service activated, recording audio...")
         self.service_active = True
         self.KWS_publisher_mock.publish(Bool(data=True))
         while self.service_active:
             pass
         response.text_heard = self.service_text
-        # self.get_logger().info(f"STT service response: {response.text_heard}")
         return response
 
 

From fefc2d0db40a7bd0d49526bee7d9c30fa75b7e18 Mon Sep 17 00:00:00 2001
From: Gilmm27 <A00839075@tec.mx>
Date: Mon, 20 Jan 2025 20:34:40 -0800
Subject: [PATCH 3/6] Fixed keyword erasing audio

---
 hri/packages/speech/scripts/useful_audio.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/hri/packages/speech/scripts/useful_audio.py b/hri/packages/speech/scripts/useful_audio.py
index 06dafeb..27e2b65 100755
--- a/hri/packages/speech/scripts/useful_audio.py
+++ b/hri/packages/speech/scripts/useful_audio.py
@@ -75,7 +75,7 @@ def __init__(self):
 
         self.timer = None
         self.is_saying = False
-        self.audio_state = "None"
+        self.audio_state = "idle"
 
         self.publisher = self.create_publisher(AudioData, "UsefulAudio", 20)
         self.audio_state_publisher = self.create_publisher(String, "AudioState", 10)
@@ -221,9 +221,10 @@ def callback_saying(self, msg):
         self.compute_audio_state()
 
     def callback_keyword(self, msg):
-        self.triggered = True
-        self.discard_audio()
-        self.compute_audio_state()
+        if self.audio_state == "idle":
+            self.triggered = True
+            self.discard_audio()
+            self.compute_audio_state()
 
     def compute_audio_state(self):
         new_state = (

From 680d6c0c7b3e02bb78c1e5667d0e172f14808661 Mon Sep 17 00:00:00 2001
From: Gilmm27 <A00839075@tec.mx>
Date: Sun, 26 Jan 2025 21:43:45 -0800
Subject: [PATCH 4/6] added docker compose for stt

---
 hri/.gitignore                                |  1 +
 hri/README.md                                 |  2 +-
 hri/packages/speech/scripts/stt/Dockerfile    |  5 --
 hri/packages/speech/scripts/stt/README.md     | 16 +----
 hri/packages/speech/scripts/stt/Whisper.py    | 62 -------------------
 .../speech/scripts/stt/docker-compose.yaml    | 12 ++++
 .../speech/scripts/stt/requirements.txt       |  1 -
 7 files changed, 16 insertions(+), 83 deletions(-)
 delete mode 100755 hri/packages/speech/scripts/stt/Whisper.py
 create mode 100644 hri/packages/speech/scripts/stt/docker-compose.yaml

diff --git a/hri/.gitignore b/hri/.gitignore
index 079e92f..3132a86 100644
--- a/hri/.gitignore
+++ b/hri/.gitignore
@@ -6,3 +6,4 @@
 **/.vscode/
 **/__pychache__/
 **/models
+**.pyc
\ No newline at end of file
diff --git a/hri/README.md b/hri/README.md
index ab150fe..b6621dc 100644
--- a/hri/README.md
+++ b/hri/README.md
@@ -70,7 +70,7 @@ Most of the final commands will be executed using the docker compose file.
 However, some testing commands are the following:
 
 ```bash
-# Speech (Remember to start the whisper docker before)
+# Speech (Remember to start the stt docker before)
 ros2 launch speech devices_launch.py
 
 ros2 topic pub /speech/speak_now --once std_msgs/msg/String "data: 'Go to the kitchen and grab cookies'"
diff --git a/hri/packages/speech/scripts/stt/Dockerfile b/hri/packages/speech/scripts/stt/Dockerfile
index f7fdb0e..44c42d7 100644
--- a/hri/packages/speech/scripts/stt/Dockerfile
+++ b/hri/packages/speech/scripts/stt/Dockerfile
@@ -14,8 +14,3 @@ RUN apt-get update && apt-get install -y \
 RUN pip install --no-cache-dir -r requirements.txt
 
 RUN apt-get update && apt-get install -y ffmpeg
-
-# Expose the port for the gRPC server
-EXPOSE 50051
-
-CMD ["bash", "-c", "python -u $SCRIPT_NAME --port $PORT --model_size $MODEL_SIZE"]
diff --git a/hri/packages/speech/scripts/stt/README.md b/hri/packages/speech/scripts/stt/README.md
index 9a6c6ca..7a17035 100644
--- a/hri/packages/speech/scripts/stt/README.md
+++ b/hri/packages/speech/scripts/stt/README.md
@@ -24,23 +24,11 @@ python3 Whisper.py
 
 ## Running on CPU
 
-If testing or running on a PC/laptop, use the Dockerfile to run the scripts.
+If testing or running on a PC/laptop, use docker compose to run the scripts.
 
 ```bash
 # pwd -> /speech/scripts/stt
-docker build -t roborregos/home2:stt .
-
-# To run whisper for the first time:
-# pwd -> /speech/scripts/stt
-docker run -e SCRIPT_NAME=Whisper.py -e PORT=50051 -e MODEL_SIZE=base.en -p 50051:50051 --name whisper -v .:/app roborregos/home2:stt
-# In the future you can use:
-docker start -ai whisper
-
-# To run faster-whisper for the first time:
-# pwd -> /speech/scripts/stt
-docker run -e SCRIPT_NAME=Faster-whisper.py -e PORT=50051 -e MODEL_SIZE=base.en -p 50051:50051 --name faster-whisper -v .:/app roborregos/home2:stt
-# In the future you can use:
-docker start -ai faster-whisper
+docker compose up
 ```
 
 ## gRPC implementation
diff --git a/hri/packages/speech/scripts/stt/Whisper.py b/hri/packages/speech/scripts/stt/Whisper.py
deleted file mode 100755
index 182aec9..0000000
--- a/hri/packages/speech/scripts/stt/Whisper.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import grpc
-from concurrent import futures
-import speech_pb2
-import speech_pb2_grpc
-import whisper
-import torch
-import argparse
-from wav_utils import WavUtils
-import os
-
-
-class WhisperServicer(speech_pb2_grpc.SpeechServiceServicer):
-    def __init__(self, model_size):
-        self.model_size = model_size
-        self.audio_model = self.load_model()
-
-    def load_model(self):
-        model_directory = os.path.join(os.path.dirname(__file__), "models")
-        whisper._download(whisper._MODELS[self.model_size], model_directory, False)
-        return whisper.load_model(self.model_size, download_root=model_directory)
-
-    def Transcribe(self, request, context):
-        # Generate a temporary WAV file from received audio data
-        temp_file = WavUtils.generate_temp_wav(1, 2, 16000, request.audio_data)
-
-        # Perform transcription
-        result = self.audio_model.transcribe(temp_file, fp16=torch.cuda.is_available())
-        WavUtils.discard_wav(temp_file)
-
-        # Return the transcribed text
-        return speech_pb2.TextResponse(text=result["text"].strip())
-
-
-def serve(port, model_size):
-    # Create the gRPC server
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
-    speech_pb2_grpc.add_SpeechServiceServicer_to_server(
-        WhisperServicer(model_size), server
-    )
-
-    # Bind to a port
-    server.add_insecure_port(f"0.0.0.0:{port}")
-    print(
-        f"Whisper gRPC server is running on port {port} with model size {model_size}..."
-    )
-    server.start()
-    server.wait_for_termination()
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Whisper gRPC server")
-    parser.add_argument(
-        "--port", type=int, default=50051, help="Port to run the gRPC server on"
-    )
-    parser.add_argument(
-        "--model_size",
-        type=str,
-        default="base.en",
-        help="Model size to use (e.g., base.en, small.en, medium.en, large.en)",
-    )
-    args = parser.parse_args()
-    serve(args.port, args.model_size)
diff --git a/hri/packages/speech/scripts/stt/docker-compose.yaml b/hri/packages/speech/scripts/stt/docker-compose.yaml
new file mode 100644
index 0000000..355a1e2
--- /dev/null
+++ b/hri/packages/speech/scripts/stt/docker-compose.yaml
@@ -0,0 +1,12 @@
+services:
+  stt:
+    container_name: home2-hri-stt
+    image: roborregos/home2:hri-stt
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "50051:50051"
+    volumes:
+      - .:/app
+    command: ["bash", "-c", "python -u Faster-whisper.py --port 50051 --model_size base.en"]
diff --git a/hri/packages/speech/scripts/stt/requirements.txt b/hri/packages/speech/scripts/stt/requirements.txt
index f60b09d..9e9c6bd 100644
--- a/hri/packages/speech/scripts/stt/requirements.txt
+++ b/hri/packages/speech/scripts/stt/requirements.txt
@@ -2,7 +2,6 @@ grpcio
 grpcio-tools
 torch
 torchaudio
-whisper
 faster-whisper
 pydub
 soundfile

From 856f89200da1058e098c01f6af9e9ccc76dfcb48 Mon Sep 17 00:00:00 2001
From: Gilmm27 <A00839075@tec.mx>
Date: Mon, 27 Jan 2025 23:08:41 -0800
Subject: [PATCH 5/6] moved docker to correct directory

---
 .../Dockerfile => docker/hri/Dockerfile.stt   |  4 +-
 .../hri/stt.yaml                              |  7 +-
 .../speech/scripts/stt/Faster-whisper.py      |  7 +-
 hri/packages/speech/scripts/stt/README.md     |  4 +-
 hri/packages/speech/scripts/stt/wav_utils.py  | 71 -------------------
 5 files changed, 14 insertions(+), 79 deletions(-)
 rename hri/packages/speech/scripts/stt/Dockerfile => docker/hri/Dockerfile.stt (69%)
 rename hri/packages/speech/scripts/stt/docker-compose.yaml => docker/hri/stt.yaml (58%)
 delete mode 100644 hri/packages/speech/scripts/stt/wav_utils.py

diff --git a/hri/packages/speech/scripts/stt/Dockerfile b/docker/hri/Dockerfile.stt
similarity index 69%
rename from hri/packages/speech/scripts/stt/Dockerfile
rename to docker/hri/Dockerfile.stt
index 44c42d7..fb57156 100644
--- a/hri/packages/speech/scripts/stt/Dockerfile
+++ b/docker/hri/Dockerfile.stt
@@ -2,7 +2,7 @@ FROM python:3.9-slim
 
 WORKDIR /app
 
-COPY requirements.txt .
+COPY ../../hri/packages/speech/scripts/stt/requirements.txt .
 
 # Install dependencies
 RUN apt-get update && apt-get install -y \
@@ -13,4 +13,4 @@ RUN apt-get update && apt-get install -y \
 
 RUN pip install --no-cache-dir -r requirements.txt
 
-RUN apt-get update && apt-get install -y ffmpeg
+RUN apt-get update && apt-get install -y ffmpeg
\ No newline at end of file
diff --git a/hri/packages/speech/scripts/stt/docker-compose.yaml b/docker/hri/stt.yaml
similarity index 58%
rename from hri/packages/speech/scripts/stt/docker-compose.yaml
rename to docker/hri/stt.yaml
index 355a1e2..54dc135 100644
--- a/hri/packages/speech/scripts/stt/docker-compose.yaml
+++ b/docker/hri/stt.yaml
@@ -3,10 +3,11 @@ services:
     container_name: home2-hri-stt
     image: roborregos/home2:hri-stt
     build:
-      context: .
-      dockerfile: Dockerfile
+      context: ../..
+      dockerfile: docker/hri/Dockerfile.stt
     ports:
       - "50051:50051"
     volumes:
-      - .:/app
+      - ../../hri/packages/speech/scripts/stt/:/app
+      - ../../hri/packages/speech/speech/:/app/speech
     command: ["bash", "-c", "python -u Faster-whisper.py --port 50051 --model_size base.en"]
diff --git a/hri/packages/speech/scripts/stt/Faster-whisper.py b/hri/packages/speech/scripts/stt/Faster-whisper.py
index f999751..5499655 100644
--- a/hri/packages/speech/scripts/stt/Faster-whisper.py
+++ b/hri/packages/speech/scripts/stt/Faster-whisper.py
@@ -5,8 +5,13 @@
 from faster_whisper import WhisperModel
 import os
 import torch
-from wav_utils import WavUtils
 import argparse
+import sys
+
+# Add the directory containing the protos to the Python path
+sys.path.append(os.path.join(os.path.dirname(__file__), "speech"))
+
+from wav_utils import WavUtils
 
 
 class WhisperServicer(speech_pb2_grpc.SpeechServiceServicer):
diff --git a/hri/packages/speech/scripts/stt/README.md b/hri/packages/speech/scripts/stt/README.md
index 7a17035..f2bd7a3 100644
--- a/hri/packages/speech/scripts/stt/README.md
+++ b/hri/packages/speech/scripts/stt/README.md
@@ -27,8 +27,8 @@ python3 Whisper.py
 If testing or running on a PC/laptop, use docker compose to run the scripts.
 
 ```bash
-# pwd -> /speech/scripts/stt
-docker compose up
+# pwd -> /docker/hri
+docker compose -f stt.yaml up
 ```
 
 ## gRPC implementation
diff --git a/hri/packages/speech/scripts/stt/wav_utils.py b/hri/packages/speech/scripts/stt/wav_utils.py
deleted file mode 100644
index 4716f73..0000000
--- a/hri/packages/speech/scripts/stt/wav_utils.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import tempfile
-import wave
-import os
-from pydub import AudioSegment
-import soundfile as sf
-import sounddevice as sd
-
-
-class WavUtils:
-    @staticmethod
-    # Convert incoming data (AudioData) to wav file
-    def generate_temp_wav(n_channels, sample_width, sample_rate, data):
-        # data = bytes(data)
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-            with wave.open(temp_file, "w") as wav_file:
-                wav_file.setnchannels(n_channels)
-                wav_file.setsampwidth(sample_width)
-                wav_file.setframerate(sample_rate)
-
-                wav_file.writeframes(data)
-            # Return the temporary file name
-            return temp_file.name
-
-    @staticmethod
-    def discard_wav(file_path):
-        if (
-            os.path.exists(file_path)
-            and os.path.isfile(file_path)
-            and file_path.endswith(".wav")
-        ):
-            os.remove(file_path)
-
-    @staticmethod
-    # Return if the audio is over the minumum threshold
-    def within_time_frame(file_path, min_time, max_time):
-        time = WavUtils.get_file_time(file_path)
-        if max_time > time and time > min_time:
-            return True
-        return False
-
-    @staticmethod
-    def get_file_time(file_path):
-        with wave.open(file_path, "r") as f:
-            frames = f.getnframes()
-            rate = f.getframerate()
-            duration = frames / float(rate * f.getnchannels())
-            return duration
-
-    @staticmethod
-    def play_wav(file_path, device_index):
-        # playsound(file_path,block=True)
-        # return
-        data, samplerate = sf.read(file_path)
-        sd.default.device = device_index
-        sd.play(data, samplerate)
-        sd.wait()
-
-    @staticmethod
-    def mp3_to_wav(mp3_path, wav_path, sample_rate=44100):
-        # Load the MP3 file
-        audio = AudioSegment.from_mp3(mp3_path)
-
-        # Export the audio to WAV format
-        audio.export(wav_path, format="wav", parameters=["-ar", str(sample_rate)])
-
-    @staticmethod
-    def play_mp3(mp3_path, device_index=11):
-        wav_name = os.path.splitext(mp3_path)[0] + "_temp" + ".wav"
-        WavUtils.mp3_to_wav(mp3_path, wav_name)
-        WavUtils.play_wav(wav_name, device_index)
-        WavUtils.discard_wav(wav_name)

From 388c041ac59a1f6bd503dc9b11898cf614a770e5 Mon Sep 17 00:00:00 2001
From: Gilmm27 <A00839075@tec.mx>
Date: Wed, 29 Jan 2025 17:57:14 -0800
Subject: [PATCH 6/6] added service name parameter

---
 hri/packages/speech/config/hear.yaml | 1 +
 hri/packages/speech/scripts/hear.py  | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/hri/packages/speech/config/hear.yaml b/hri/packages/speech/config/hear.yaml
index db0bafb..1708a6f 100644
--- a/hri/packages/speech/config/hear.yaml
+++ b/hri/packages/speech/config/hear.yaml
@@ -2,4 +2,5 @@ hear:
   ros__parameters:
     STT_SERVER_IP: "127.0.0.1:50051"
     START_SERVICE: True
+    STT_SERVICE_NAME: "stt_service"
     detection_publish_topic: "/keyword_detected"
\ No newline at end of file
diff --git a/hri/packages/speech/scripts/hear.py b/hri/packages/speech/scripts/hear.py
index 987b9e5..6056e43 100755
--- a/hri/packages/speech/scripts/hear.py
+++ b/hri/packages/speech/scripts/hear.py
@@ -49,6 +49,11 @@ def __init__(self):
             .get_parameter_value()
             .bool_value
         )
+        service_name = (
+            self.declare_parameter("STT_SERVICE_NAME", "stt_service")
+            .get_parameter_value()
+            .string_value
+        )
 
         # Initialize the Whisper gRPC client
         self.client = WhisperClient(server_ip)
@@ -85,7 +90,7 @@ def __init__(self):
             )
             self.stt_service = self.create_service(
                 STT,
-                "stt_service",
+                service_name,
                 self.stt_service_callback,
                 callback_group=service_group,
             )