From 450a6c83b16cf54d6a7fcd90f28d31acd61694d9 Mon Sep 17 00:00:00 2001
From: Danilo Pejovic <115164734+danilo-pejovic@users.noreply.github.com>
Date: Thu, 7 Mar 2024 14:30:31 +0100
Subject: [PATCH] Mic recording (#74)

---
 Dockerfile                                    |  2 +-
 .../include/rae_hw/peripherals/speakers.hpp   |  2 +-
 rae_hw/src/peripherals/speakers.cpp           |  7 ++---
 rae_msgs/srv/PlayAudio.srv                    |  1 +
 rae_sdk/rae_sdk/robot/audio.py                | 31 +++++++++++++++++--
 5 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f0368ef..20bcb6a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,7 +26,7 @@ RUN apt-get update && apt-get -y install --no-install-recommends \
     libsndfile1-dev \
     libsndfile1
 
-RUN pip3 install openai
+RUN pip3 install openai ffmpeg-python
 
 ENV WS=/ws
 RUN mkdir -p $WS/src
diff --git a/rae_hw/include/rae_hw/peripherals/speakers.hpp b/rae_hw/include/rae_hw/peripherals/speakers.hpp
index 8ee4314..6a26670 100644
--- a/rae_hw/include/rae_hw/peripherals/speakers.hpp
+++ b/rae_hw/include/rae_hw/peripherals/speakers.hpp
@@ -30,7 +30,7 @@ class SpeakersNode : public rclcpp_lifecycle::LifecycleNode {
 
    private:
     void play_mp3(const char*);
-    void play_wav(const char*);
+    void play_wav(const char*, const float);
     rclcpp::Service<rae_msgs::srv::PlayAudio>::SharedPtr play_audio_service_;
 
     void play_audio_service_callback(const std::shared_ptr<rae_msgs::srv::PlayAudio::Request> request,
diff --git a/rae_hw/src/peripherals/speakers.cpp b/rae_hw/src/peripherals/speakers.cpp
index 09ca7fb..7d92500 100644
--- a/rae_hw/src/peripherals/speakers.cpp
+++ b/rae_hw/src/peripherals/speakers.cpp
@@ -47,11 +47,12 @@ CallbackReturn SpeakersNode::on_shutdown(const rclcpp_lifecycle::State& /*previo
 void SpeakersNode::play_audio_service_callback(const std::shared_ptr<rae_msgs::srv::PlayAudio::Request> request,
                                                const std::shared_ptr<rae_msgs::srv::PlayAudio::Response> response) {
     const std::string& file_location = request->file_location;
+    const float gain = request->gain;
 
     // Check if the file ends with ".wav"
     if(file_location.size() >= 4 && file_location.substr(file_location.size() - 4) == ".wav") {
         // Call the play_wav function
-        play_wav(file_location.c_str());
+        play_wav(file_location.c_str(), gain);
         response->success = true;
         return;
     }
@@ -110,7 +111,7 @@ void SpeakersNode::play_mp3(const char* mp3_file) {
     return;
 }
 
-void SpeakersNode::play_wav(const char* wav_file) {
+void SpeakersNode::play_wav(const char* wav_file, const float gain) {
     // Open WAV file
     SF_INFO sfinfo;
     SNDFILE* file = sf_open(wav_file, SFM_READ, &sfinfo);
@@ -133,8 +134,6 @@ void SpeakersNode::play_wav(const char* wav_file) {
     int32_t* buffer_wav = new int32_t[BUFFER_SIZE * sfinfo.channels];  // Use int32_t for 32-bit format
     sf_count_t readCount;
 
-    const float gain = 64.0f;  // Adjust this factor for desired gain
-
     while((readCount = sf_readf_int(file, buffer_wav, BUFFER_SIZE)) > 0) {
         // Apply gain to the samples
         for(int i = 0; i < readCount * sfinfo.channels; ++i) {
diff --git a/rae_msgs/srv/PlayAudio.srv b/rae_msgs/srv/PlayAudio.srv
index d5f7da2..ca2bef7 100644
--- a/rae_msgs/srv/PlayAudio.srv
+++ b/rae_msgs/srv/PlayAudio.srv
@@ -1,3 +1,4 @@
 string file_location
+float32 gain 1.0
 ---
 bool success
diff --git a/rae_sdk/rae_sdk/robot/audio.py b/rae_sdk/rae_sdk/robot/audio.py
index e31dbcd..86c2194 100644
--- a/rae_sdk/rae_sdk/robot/audio.py
+++ b/rae_sdk/rae_sdk/robot/audio.py
@@ -2,7 +2,8 @@
 import random
 import logging as log
 from ament_index_python import get_package_share_directory
-
+import base64
+import ffmpeg
 from rae_msgs.srv import PlayAudio
 
 
@@ -33,11 +34,37 @@ def __init__(self, ros_interface):
         log.info("Audio Controller ready")
 
 
-    def play_audio_file(self, audio_file_path):
+    def play_audio_file(self, audio_file_path, gain = 1.0):
         req = PlayAudio.Request()
         req.file_location = audio_file_path
+        req.gain = gain
         res = self._ros_interface.call_async_srv('/play_audio', req)
         return res
+    
+    def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"):
+        """
+        Decode the Base64 audio data and save it as a WAV file.
+        
+        Attributes
+        ----------
+            audio_data (str): Base64 encoded audio data.
+            output_file (str, optional): Path to save the WAV file. Defaults to "/app/output.wav".
+
+            
+        """
+        # Decode Base64 data
+        binary_data = base64.b64decode(audio_data)
+    
+        # Convert WebM to WAV using ffmpeg
+        output, _ = (
+            ffmpeg.input('pipe:', format='webm')
+            .output('pipe:', format='wav')
+            .run(input=binary_data, capture_stdout=True, capture_stderr=True)
+        )
+    
+        # Write the output to the specified WAV file
+        with open(output_file, 'wb') as wave_file:
+            wave_file.write(output)
 
     def honk(self):
         horn_path = os.path.join(self._assets_path, 'sfx', 'horn.mp3')