From 450a6c83b16cf54d6a7fcd90f28d31acd61694d9 Mon Sep 17 00:00:00 2001 From: Danilo Pejovic <115164734+danilo-pejovic@users.noreply.github.com> Date: Thu, 7 Mar 2024 14:30:31 +0100 Subject: [PATCH] Mic recording (#74) --- Dockerfile | 2 +- .../include/rae_hw/peripherals/speakers.hpp | 2 +- rae_hw/src/peripherals/speakers.cpp | 7 ++--- rae_msgs/srv/PlayAudio.srv | 1 + rae_sdk/rae_sdk/robot/audio.py | 31 +++++++++++++++++-- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index f0368ef..20bcb6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,7 @@ RUN apt-get update && apt-get -y install --no-install-recommends \ libsndfile1-dev \ libsndfile1 -RUN pip3 install openai +RUN pip3 install openai ffmpeg-python ENV WS=/ws RUN mkdir -p $WS/src diff --git a/rae_hw/include/rae_hw/peripherals/speakers.hpp b/rae_hw/include/rae_hw/peripherals/speakers.hpp index 8ee4314..6a26670 100644 --- a/rae_hw/include/rae_hw/peripherals/speakers.hpp +++ b/rae_hw/include/rae_hw/peripherals/speakers.hpp @@ -30,7 +30,7 @@ class SpeakersNode : public rclcpp_lifecycle::LifecycleNode { private: void play_mp3(const char*); - void play_wav(const char*); + void play_wav(const char*, const float); rclcpp::Service::SharedPtr play_audio_service_; void play_audio_service_callback(const std::shared_ptr request, diff --git a/rae_hw/src/peripherals/speakers.cpp b/rae_hw/src/peripherals/speakers.cpp index 09ca7fb..7d92500 100644 --- a/rae_hw/src/peripherals/speakers.cpp +++ b/rae_hw/src/peripherals/speakers.cpp @@ -47,11 +47,12 @@ CallbackReturn SpeakersNode::on_shutdown(const rclcpp_lifecycle::State& /*previo void SpeakersNode::play_audio_service_callback(const std::shared_ptr request, const std::shared_ptr response) { const std::string& file_location = request->file_location; + const float gain = request->gain; // Check if the file ends with ".wav" if(file_location.size() >= 4 && file_location.substr(file_location.size() - 4) == ".wav") { // Call the play_wav function - play_wav(file_location.c_str()); + play_wav(file_location.c_str(), gain); response->success = true; return; } @@ -110,7 +111,7 @@ void SpeakersNode::play_mp3(const char* mp3_file) { return; } -void SpeakersNode::play_wav(const char* wav_file) { +void SpeakersNode::play_wav(const char* wav_file, const float gain) { // Open WAV file SF_INFO sfinfo; SNDFILE* file = sf_open(wav_file, SFM_READ, &sfinfo); @@ -133,8 +134,6 @@ void SpeakersNode::play_wav(const char* wav_file) { int32_t* buffer_wav = new int32_t[BUFFER_SIZE * sfinfo.channels]; // Use int32_t for 32-bit format sf_count_t readCount; - const float gain = 64.0f; // Adjust this factor for desired gain - while((readCount = sf_readf_int(file, buffer_wav, BUFFER_SIZE)) > 0) { // Apply gain to the samples for(int i = 0; i < readCount * sfinfo.channels; ++i) { diff --git a/rae_msgs/srv/PlayAudio.srv b/rae_msgs/srv/PlayAudio.srv index d5f7da2..ca2bef7 100644 --- a/rae_msgs/srv/PlayAudio.srv +++ b/rae_msgs/srv/PlayAudio.srv @@ -1,3 +1,4 @@ string file_location +float32 gain 1.0 --- bool success diff --git a/rae_sdk/rae_sdk/robot/audio.py b/rae_sdk/rae_sdk/robot/audio.py index e31dbcd..86c2194 100644 --- a/rae_sdk/rae_sdk/robot/audio.py +++ b/rae_sdk/rae_sdk/robot/audio.py @@ -2,7 +2,8 @@ import random import logging as log from ament_index_python import get_package_share_directory - +import base64 +import ffmpeg from rae_msgs.srv import PlayAudio @@ -33,11 +34,37 @@ def __init__(self, ros_interface): log.info("Audio Controller ready") - def play_audio_file(self, audio_file_path): + def play_audio_file(self, audio_file_path, gain = 1.0): req = PlayAudio.Request() req.file_location = audio_file_path + req.gain = gain res = self._ros_interface.call_async_srv('/play_audio', req) return res + + def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"): + """ + Decode the Base64 audio data and save it as a WAV file. + + Attributes + ---------- + audio_data (str): Base64 encoded audio data. + output_file (str, optional): Path to save the WAV file. Defaults to "/app/output.wav". + + + """ + # Decode Base64 data + binary_data = base64.b64decode(audio_data) + + # Convert WebM to WAV using ffmpeg + output, _ = ( + ffmpeg.input('pipe:', format='webm') + .output('pipe:', format='wav') + .run(input=binary_data, capture_stdout=True, capture_stderr=True) + ) + + # Write the output to the specified WAV file + with open(output_file, 'wb') as wave_file: + wave_file.write(output) def honk(self): horn_path = os.path.join(self._assets_path, 'sfx', 'horn.mp3')