Skip to content

Commit

Permalink
Merge pull request #16 from RoBorregos/hri
Browse files Browse the repository at this point in the history
refactor: ros params for speech nodes
  • Loading branch information
Oscar-gg authored Jan 4, 2025
2 parents 992239d + e99ae7d commit b2d9711
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 74 deletions.
5 changes: 2 additions & 3 deletions hri/packages/speech/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ find_package(rclcpp REQUIRED)
find_package(rclpy REQUIRED)
find_package(frida_interfaces REQUIRED)

# IF THERE ISNT A LAUNCH DIRECTORY DONT ADD THIS
#install(DIRECTORY launch
# DESTINATION share/${PROJECT_NAME})
install(DIRECTORY launch config
DESTINATION share/${PROJECT_NAME})

# Add Cpp executable
# add_executable(ignore_laser src/ignore_laser.cpp)
Expand Down
7 changes: 7 additions & 0 deletions hri/packages/speech/config/microphone.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
audio_capturer:
ros__parameters:
MIC_DEVICE_NAME: "default"
MIC_INPUT_CHANNELS: 32
MIC_OUT_CHANNELS: 32
USE_RESPEAKER: False
publish_topic: "/rawAudioChunk"
12 changes: 12 additions & 0 deletions hri/packages/speech/config/speaker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
say:
ros__parameters:
SPEAKER_DEVICE_NAME: "default"
SPEAKER_INPUT_CHANNELS: 32
SPEAKER_OUT_CHANNELS: 32

speak_service: "/speech/speak"
speak_topic: "/speech/speak_now"
speaking_topic: "saying"

model: "en_US-amy-medium"
offline: True
36 changes: 36 additions & 0 deletions hri/packages/speech/launch/devices_launch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os

from ament_index_python.packages import get_package_share_directory
from launch import LaunchDescription
from launch_ros.actions import Node


def generate_launch_description():
mic_config = os.path.join(
get_package_share_directory("speech"), "config", "microphone.yaml"
)

speaker_config = os.path.join(
get_package_share_directory("speech"), "config", "speaker.yaml"
)

return LaunchDescription(
[
Node(
package="speech",
executable="audio_capturer.py",
name="audio_capturer",
output="screen",
emulate_tty=True,
parameters=[mic_config],
),
Node(
package="speech",
executable="say.py",
name="say",
output="screen",
emulate_tty=True,
parameters=[speaker_config],
),
]
)
65 changes: 42 additions & 23 deletions hri/packages/speech/scripts/audio_capturer.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,53 @@
#!/usr/bin/env python3

import numpy as np
import pyaudio
import rclpy
from rclpy.executors import ExternalShutdownException
from rclpy.node import Node
from speech.speech_api_utils import SpeechApiUtils

from frida_interfaces.msg import AudioData

import pyaudio
import os
import numpy as np

from speech.speech_api_utils import SpeechApiUtils
class AudioCapturer(Node):
def __init__(self):
super().__init__("audio_capturer")

self.declare_parameter("publish_topic", "/rawAudioChunk")
self.declare_parameter("MIC_DEVICE_NAME", "default")
self.declare_parameter("MIC_INPUT_CHANNELS", 32)
self.declare_parameter("MIC_OUT_CHANNELS", 32)
self.declare_parameter("USE_RESPEAKER", False)

USE_RESPEAKER = False
publish_topic = (
self.get_parameter("publish_topic").get_parameter_value().string_value
)

# Get device index using environment variables
MIC_DEVICE_NAME = os.getenv("MIC_DEVICE_NAME", default=None)
MIC_INPUT_CHANNELS = int(os.getenv("MIC_INPUT_CHANNELS", default=2))
MIC_OUT_CHANNELS = int(os.getenv("MIC_OUT_CHANNELS", default=0))
self.use_respeaker = (
self.get_parameter("USE_RESPEAKER").get_parameter_value().bool_value
)

INPUT_DEVICE_INDEX = SpeechApiUtils.getIndexByNameAndChannels(
MIC_DEVICE_NAME, MIC_INPUT_CHANNELS, MIC_OUT_CHANNELS
)
mic_device_name = (
self.get_parameter("MIC_DEVICE_NAME").get_parameter_value().string_value
)
mic_input_channels = (
self.get_parameter("MIC_INPUT_CHANNELS").get_parameter_value().integer_value
)
mic_out_channels = (
self.get_parameter("MIC_OUT_CHANNELS").get_parameter_value().integer_value
)

if INPUT_DEVICE_INDEX is None:
print("Warning: input device index not found, using system default.")
self.publisher_ = self.create_publisher(AudioData, publish_topic, 20)
self.input_device_index = SpeechApiUtils.getIndexByNameAndChannels(
mic_device_name, mic_input_channels, mic_out_channels
)

if self.input_device_index is None:
self.get_logger().warn(
"Input device index not found, using system default."
)

class AudioCapturer(Node):
def __init__(self):
super().__init__("audio_capturer")
self.publisher_ = self.create_publisher(AudioData, "rawAudioChunk", 20)
self.get_logger().info("AudioCapturer node initialized.")

def record(self):
Expand All @@ -39,13 +56,13 @@ def record(self):
# Format for the recorded audio, constants set from the Porcupine demo.py
CHUNK_SIZE = 512
FORMAT = pyaudio.paInt16 # Signed 2 bytes.
CHANNELS = 1 if not USE_RESPEAKER else 6
RATE = 16000 if USE_RESPEAKER else 41000
CHANNELS = 6 if self.use_respeaker else 1
RATE = 16000 if self.use_respeaker else 41000
EXTRACT_CHANNEL = 0 # Use channel 0. Tested with TestMic.py. See channel meaning: https://wiki.seeedstudio.com/ReSpeaker-USB-Mic-Array/#update-firmware

p = pyaudio.PyAudio()
stream = p.open(
input_device_index=INPUT_DEVICE_INDEX, # See list_audio_devices() or set it to None for default
input_device_index=self.input_device_index, # See list_audio_devices() or set it to None for default
format=FORMAT,
channels=CHANNELS,
rate=RATE,
Expand All @@ -56,13 +73,15 @@ def record(self):
while stream.is_active() and rclpy.ok():
try:
in_data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
if USE_RESPEAKER:
if self.use_respeaker:
in_data = np.frombuffer(in_data, dtype=np.int16)[EXTRACT_CHANNEL::6]
in_data = in_data.tobytes()
msg = in_data
self.publisher_.publish(AudioData(data=msg))
except IOError as e:
print("I/O error({0}): {1}".format(e.errno, e.strerror))
self.get_logger().error(
"I/O error({0}): {1}".format(e.errno, e.strerror)
)

if not stream.is_active():
self.get_logger().error("Audio stream is not active.")
Expand Down
109 changes: 63 additions & 46 deletions hri/packages/speech/scripts/say.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,77 @@
#!/usr/bin/env python3

import rclpy
from rclpy.executors import ExternalShutdownException
from rclpy.node import Node
from std_msgs.msg import String, Bool
from frida_interfaces.srv import Speak

import os
from gtts import gTTS
from pygame import mixer
import subprocess

import rclpy
from gtts import gTTS
from pygame import mixer
from rclpy.executors import ExternalShutdownException
from rclpy.node import Node
from speech.speech_api_utils import SpeechApiUtils
from speech.wav_utils import WavUtils
from std_msgs.msg import Bool, String

from frida_interfaces.srv import Speak

SPEAK_SERVICE_TOPIC = "/speech/speak"
SPEAK_NOW_TOPIC = "/speech/speak_now"

# Offline voice
MODEL = "en_US-amy-medium"
CURRENT_FILE_PATH = os.path.abspath(__file__)
VOICE_DIRECTORY = os.path.join(os.path.dirname(CURRENT_FILE_PATH), "offline_voice")

CURRENT_DIRECTORY = os.path.dirname(CURRENT_FILE_PATH)
VOICE_DIRECTORY = os.path.join(CURRENT_DIRECTORY, "offline_voice")

class Say(Node):
def __init__(self):
super().__init__("say")
self.get_logger().info("Say node has started.")

# Get device index using environment variables
SPEAKER_DEVICE_NAME = os.getenv("SPEAKER_DEVICE_NAME", default=None)
SPEAKER_INPUT_CHANNELS = int(os.getenv("SPEAKER_INPUT_CHANNELS", default=2))
SPEAKER_OUT_CHANNELS = int(os.getenv("SPEAKER_OUT_CHANNELS", default=0))
self.connected = False
self.declare_parameter("speaking_topic", "/saying")

OUTPUT_DEVICE_INDEX = SpeechApiUtils.getIndexByNameAndChannels(
SPEAKER_DEVICE_NAME, SPEAKER_INPUT_CHANNELS, SPEAKER_OUT_CHANNELS
)
self.declare_parameter("speak_service", "/speech/speak")
self.declare_parameter("speak_topic", "/speech/speak_now")
self.declare_parameter("model", "en_US-amy-medium")
self.declare_parameter("offline", True)

if OUTPUT_DEVICE_INDEX is None:
print("Warning: output device index not found, using system default.")
self.declare_parameter("SPEAKER_DEVICE_NAME", "default")
self.declare_parameter("SPEAKER_INPUT_CHANNELS", 32)
self.declare_parameter("SPEAKER_OUT_CHANNELS", 32)

DEBUG = True
OFFLINE = True
speaker_device_name = (
self.get_parameter("SPEAKER_DEVICE_NAME").get_parameter_value().string_value
)
speaker_input_channels = (
self.get_parameter("SPEAKER_INPUT_CHANNELS")
.get_parameter_value()
.integer_value
)
speaker_out_channels = (
self.get_parameter("SPEAKER_OUT_CHANNELS")
.get_parameter_value()
.integer_value
)

self.output_device_index = SpeechApiUtils.getIndexByNameAndChannels(
speaker_device_name, speaker_input_channels, speaker_out_channels
)

class Say(Node):
def __init__(self):
super().__init__("say")
self.publisher_ = self.create_publisher(Bool, "saying", 10)
self.get_logger().info("Say node has started.")
speak_service = (
self.get_parameter("speak_service").get_parameter_value().string_value
)
speak_topic = (
self.get_parameter("speak_topic").get_parameter_value().string_value
)
speaking_topic = (
self.get_parameter("speaking_topic").get_parameter_value().string_value
)

self.connected = False
if not OFFLINE:
self.connected = SpeechApiUtils.is_connected()
self.model = self.get_parameter("model").get_parameter_value().string_value
self.offline = self.get_parameter("offline").get_parameter_value().bool_value

self.create_service(Speak, SPEAK_SERVICE_TOPIC, self.speak_service)
if not self.offline:
self.connected = SpeechApiUtils.is_connected()

self.create_subscription(String, SPEAK_NOW_TOPIC, self.speak_topic, 10)
self.create_service(Speak, speak_service, self.speak_service)
self.create_subscription(String, speak_topic, self.speak_topic, 10)
self.publisher_ = self.create_publisher(Bool, speaking_topic, 10)

def speak_service(self, req):
"""When say is called as a service. Caller awaits for the response."""
Expand All @@ -70,15 +87,15 @@ def say(self, text):
self.publisher_.publish(Bool(data=True))
success = False
try:
if OFFLINE or not self.connected:
if self.offline or not self.connected:
self.offline_voice(text)
else:
self.connectedVoice(text)
success = True
except Exception as e:
print(e)
if not OFFLINE:
print("Retrying with offline mode")
self.get_logger().error(e)
if not self.offline:
self.get_logger().warn("Retrying with offline mode")
self.offline_voice(text)

self.publisher_.publish(Bool(data=False))
Expand All @@ -94,12 +111,12 @@ def offline_voice(self, text):
output_path = os.path.join(VOICE_DIRECTORY, f"{counter}.wav")
self.synthesize_voice_offline(output_path, chunk)

print(f"Generated {counter} wav files.")
self.get_logger().debug(f"Generated {counter} wav files.")

# Play and remove all mp3 files
for i in range(1, counter + 1):
save_path = os.path.join(VOICE_DIRECTORY, f"{i}.wav")
# WavUtils.play_wav(save_path, device_index=OUTPUT_DEVICE_INDEX)
# WavUtils.play_wav(save_path, device_index=self.output_device_index)
self.play_audio(save_path)
WavUtils.discard_wav(save_path)

Expand All @@ -108,8 +125,8 @@ def connectedVoice(self, text):
save_path = "play.mp3"
tts.save(save_path)
self.get_logger().info("Saying...")
# WavUtils.play_mp3(save_path, device_index=OUTPUT_DEVICE_INDEX)
# .play_mp3(save_path, device_index=OUTPUT_DEVICE_INDEX)
# WavUtils.play_mp3(save_path, device_index=self.output_device_index)
# .play_mp3(save_path, device_index=self.output_device_index)
self.play_audio(save_path)
self.get_logger().info("Finished speaking.")

Expand Down Expand Up @@ -142,7 +159,7 @@ def synthesize_voice_offline(self, output_path, text):
else "python3.10 -m piper"
)

model_path = os.path.join(VOICE_DIRECTORY, MODEL + ".onnx")
model_path = os.path.join(VOICE_DIRECTORY, self.model + ".onnx")

download_model = not os.path.exists(model_path)

Expand All @@ -155,7 +172,7 @@ def synthesize_voice_offline(self, output_path, text):
"|",
executable,
"--model",
MODEL if download_model else model_path,
self.model if download_model else model_path,
"--data-dir",
VOICE_DIRECTORY,
"--download-dir",
Expand Down
5 changes: 3 additions & 2 deletions hri/packages/speech/speech/speech_api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def getIndexByNameAndChannels(name, in_channels=1, out_channels=0):
num_dev = 0
for device_info in devices:
# print(f"Device {num_dev}: [{device_info['name']}], [{device_info['max_input_channels']}] input channels, [{device_info['max_output_channels']}] output channels")
if name in device_info["name"] or (
device_info["max_input_channels"] == in_channels
if (
name in device_info["name"]
and device_info["max_input_channels"] == in_channels
and device_info["max_output_channels"] == out_channels
):
return num_dev
Expand Down

0 comments on commit b2d9711

Please sign in to comment.