-
Notifications
You must be signed in to change notification settings - Fork 189
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into mask-rcnn-fpn
- Loading branch information
Showing
136 changed files
with
2,417 additions
and
272 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
cmake_minimum_required(VERSION 2.8.3) | ||
project(audio_to_spectrogram) | ||
|
||
find_package(catkin) | ||
catkin_package() | ||
|
||
install(DIRECTORY launch sample scripts test | ||
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} | ||
USE_SOURCE_PERMISSIONS | ||
) | ||
|
||
if(CATKIN_ENABLE_TESTING) | ||
find_package(catkin REQUIRED COMPONENTS rostest roslaunch) | ||
add_rostest(test/audio_to_spectrogram.test) | ||
roslaunch_add_file_check(launch/audio_to_spectrogram.launch) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# audio_to_spectrogram | ||
|
||
This package converts audio data to spectrum and spectrogram data. | ||
|
||
# Usage | ||
By following command, you can publish audio, spectrum and spectrogram topics. Please set correct args for your microphone configuration, such as mic\_sampling\_rate or bitdepth. | ||
|
||
```bash | ||
roslaunch audio_to_spectrogram audio_to_spectrogram.launch | ||
``` | ||
|
||
Here is an example using rosbag with 300Hz audio. | ||
```bash | ||
roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch | ||
``` | ||
|
||
|Spectrum|Spectrogram| | ||
|---|---| | ||
|![](https://user-images.githubusercontent.com/19769486/82075694-9a7ac300-9717-11ea-899c-db6119a76d52.png)|![](https://user-images.githubusercontent.com/19769486/82075685-96e73c00-9717-11ea-9abc-e6e74104d666.png)| | ||
|
||
# Scripts | ||
|
||
## audio_to_spectrum.py | ||
A script to convert audio to spectrum. | ||
|
||
- ### Publishing topics | ||
|
||
- `~spectrum` (`jsk_recognition_msgs/Spectrum`) | ||
|
||
Spectrum data calculated from audio by FFT. | ||
|
||
- ### Subscribing topics | ||
- `audio` (`audio_common_msgs/AudioData`) | ||
|
||
Audio stream data from microphone. The audio format must be `wave`. | ||
|
||
- ### Parameters | ||
- `~mic_sampling_rate` (`Int`, default: `16000`) | ||
|
||
Sampling rate [Hz] of microphone. Namely, sampling rate of audio topic. | ||
|
||
- `~fft_sampling_period` (`Double`, default: `0.3`) | ||
|
||
Period [s] to sample audio data for one FFT. | ||
|
||
- `~depth` (`Int`, default: `16`) | ||
|
||
Number of bits per audio data. | ||
|
||
- `~high_cut_freq` (`Int`, default: `800`) | ||
|
||
Threshold to limit the maximum frequency of the output spectrum. | ||
|
||
- `~low_cut_freq` (`Int`, default: `1`) | ||
|
||
Threshold to limit the minimum frequency of the output spectrum. | ||
|
||
## spectrum_to_spectrogram.py | ||
A script to convert spectrum to spectrogram. | ||
|
||
- ### Publishing topics | ||
- `~spectrogram` (`sensor_msgs/Image`) | ||
|
||
Spectrogram data, which is concatenation of spectrum in time series. Image format is 32FC1. | ||
|
||
- ### Subscribing topics | ||
- `~spectrum` (`jsk_recognition_msgs/Spectrum`) | ||
|
||
Spectrum data calculated from audio by FFT. | ||
|
||
- ### Parameters | ||
- `~image_height` (`Int`, default: `300`) | ||
|
||
Number of vertical (frequency axis) pixels in output spectrogram. | ||
|
||
- `~image_width` (`Int`, default: `300`) | ||
|
||
Number of horizontal (time axis) pixels in output spectrogram. | ||
|
||
- `~spectrogram_period` (`Double`, default: `5`) | ||
|
||
Period [s] to store spectrum data to create one spectrogram topic. | ||
|
||
- `~publish_rate` (`Double`, default: `image_width / spectrogram_period`) | ||
|
||
Publish rate [Hz] of spectrogram topic. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
<!-- data flow --> | ||
<!-- audio -> spectrum -> spectrogram --> | ||
|
||
<launch> | ||
<arg name="launch_audio_capture" default="true" /> | ||
|
||
<arg name="bitdepth" default="16" /> | ||
<arg name="mic_sampling_rate" default="16000" /> | ||
<arg name="device" default="hw:0,0" /> | ||
<arg name="audio_topic" default="/audio" /> | ||
<arg name="high_cut_freq" default="800" /> | ||
<arg name="low_cut_freq" default="1" /> | ||
<arg name="spectrogram_period" default="5" /> | ||
<arg name="gui" default="true" /> | ||
|
||
<!-- Publish audio topic from microphone --> | ||
<node name="audio_capture" pkg="audio_capture" type="audio_capture" | ||
if="$(arg launch_audio_capture)" | ||
respawn="true"> | ||
<rosparam subst_value="true"> | ||
format: wave | ||
channels: 1 | ||
depth: $(arg bitdepth) | ||
sample_rate: $(arg mic_sampling_rate) | ||
device: $(arg device) | ||
</rosparam> | ||
</node> | ||
|
||
<!-- convert audio topic to spectrum topic --> | ||
<node pkg="audio_to_spectrogram" type="audio_to_spectrum.py" name="audio_to_spectrum" respawn="true"> | ||
<remap from="~audio" to="$(arg audio_topic)" /> | ||
<rosparam subst_value="true"> | ||
mic_sampling_rate: $(arg mic_sampling_rate) | ||
fft_sampling_period: 0.3 | ||
bitdepth: $(arg bitdepth) | ||
high_cut_freq: $(arg high_cut_freq) | ||
low_cut_freq: $(arg low_cut_freq) | ||
fft_exec_rate: 50 | ||
</rosparam> | ||
</node> | ||
|
||
<!-- convert spectrum topic to spectrogram topic --> | ||
<node pkg="audio_to_spectrogram" type="spectrum_to_spectrogram.py" name="spectrum_to_spectrogram" respawn="true"> | ||
<remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" /> | ||
<rosparam subst_value="true"> | ||
image_height: 300 | ||
image_width: 300 | ||
spectrogram_period: $(arg spectrogram_period) | ||
</rosparam> | ||
</node> | ||
|
||
<group if="$(arg gui)"> | ||
<!-- visualize spectrum --> | ||
<node pkg="audio_to_spectrogram" type="spectrum_plot.py" name="spectrum_plot" > | ||
<remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" /> | ||
</node> | ||
|
||
<!-- visualize spectrogram --> | ||
<!-- you can get spectrogram image topic from /spectrogram_view/output--> | ||
<node pkg="image_view" type="image_view" name="spectrogram_view" > | ||
<remap from="image" to="/spectrum_to_spectrogram/spectrogram" /> | ||
<rosparam> | ||
<!-- set all pixels between 0 and 255 --> | ||
do_dynamic_scaling: true | ||
<!-- use jet colormap --> | ||
colormap: 2 | ||
</rosparam> | ||
</node> | ||
</group> | ||
|
||
</launch> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<package> | ||
<name>audio_to_spectrogram</name> | ||
<version>0.0.1</version> | ||
<description>Convert audio (audio_common_msgs/AudioData) to spectrogram (sensor_msgs/Image) </description> | ||
|
||
<maintainer email="[email protected]">Naoya Yamaguchi</maintainer> | ||
|
||
<license>BSD</license> | ||
|
||
<buildtool_depend>catkin</buildtool_depend> | ||
|
||
<run_depend>audio_capture</run_depend> | ||
<run_depend>audio_common_msgs</run_depend> | ||
<run_depend>cv_bridge</run_depend> | ||
<run_depend>image_view</run_depend> | ||
<run_depend>jsk_recognition_msgs</run_depend> | ||
<run_depend>python-matplotlib</run_depend> | ||
<run_depend>sensor_msgs</run_depend> | ||
|
||
<test_depend>roslaunch</test_depend> | ||
<test_depend>rostest</test_depend> | ||
|
||
</package> |
Binary file not shown.
19 changes: 19 additions & 0 deletions
19
audio_to_spectrogram/sample/sample_audio_to_spectrogram.launch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
<!-- data flow --> | ||
<!-- audio -> spectrum -> spectrogram --> | ||
|
||
<launch> | ||
<arg name="gui" default="true" /> | ||
|
||
<!-- play rosbag --> | ||
<arg name="filename" default="$(find audio_to_spectrogram)/sample/data/audio_300hz.bag" /> | ||
<param name="use_sim_time" value="true" /> | ||
<node name="rosbag_play" | ||
pkg="rosbag" type="play" | ||
args="$(arg filename) --clock --loop"/> | ||
|
||
<include file="$(find audio_to_spectrogram)/launch/audio_to_spectrogram.launch" > | ||
<arg name="launch_audio_capture" value="false" /> | ||
<arg name="gui" value="$(arg gui)" /> | ||
</include> | ||
|
||
</launch> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#!/usr/bin/env python | ||
|
||
import numpy as np | ||
import rospy | ||
|
||
from audio_common_msgs.msg import AudioData | ||
from jsk_recognition_msgs.msg import Spectrum | ||
|
||
|
||
# This node execute FFT to audio (audio_common_msgs/AudioData) | ||
# The number of audio channel is assumed to be 1. | ||
# The format of audio topic is assumed to be wave. | ||
|
||
class AudioToSpectrum(object): | ||
|
||
def __init__(self): | ||
super(AudioToSpectrum, self).__init__() | ||
|
||
# Audio topic config | ||
# Sampling rate of microphone (namely audio topic). | ||
mic_sampling_rate = rospy.get_param('~mic_sampling_rate', 16000) | ||
# Period[s] to sample audio data for one fft | ||
fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3) | ||
# Bits per one audio data | ||
bitdepth = rospy.get_param('~bitdepth', 16) | ||
if bitdepth == 16: | ||
self.dtype = 'int16' | ||
else: | ||
rospy.logerr("'~bitdepth' {} is unsupported.".format(bitdepth)) | ||
# Audio topic buffer | ||
self.audio_buffer = np.array([], dtype=self.dtype) | ||
# How long audio_buffer should be for one audio topic | ||
self.audio_buffer_len = int(mic_sampling_rate * fft_sampling_period) | ||
|
||
# fft config | ||
window_function = np.arange( | ||
0.0, 1.0, 1.0 / self.audio_buffer_len) | ||
self.window_function = 0.54 - 0.46 * np.cos( | ||
2 * np.pi * window_function) | ||
high_cut_freq = rospy.get_param('~high_cut_freq', 800) | ||
if high_cut_freq > mic_sampling_rate / 2: | ||
rospy.logerr('Set high_cut_freq lower than {} Hz'.format( | ||
mic_sampling_rate / 2)) | ||
low_cut_freq = rospy.get_param('~low_cut_freq', 1) # remove 0 Hz | ||
self.freq = np.fft.fftfreq( | ||
self.audio_buffer_len, d=1./mic_sampling_rate) | ||
self.cutoff_mask = np.where( | ||
(low_cut_freq <= self.freq) & (self.freq <= high_cut_freq), | ||
True, False) | ||
# How many times fft is executed in one second | ||
# fft_exec_rate equals to output spectrogram hz | ||
self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50) | ||
|
||
# Publisher and Subscriber | ||
rospy.Subscriber( | ||
'~audio', AudioData, self.audio_cb) | ||
self.pub_spectrum = rospy.Publisher( | ||
'~spectrum', Spectrum, queue_size=1) | ||
self.pub_spectrum_filtered = rospy.Publisher( | ||
'~spectrum_filtered', Spectrum, queue_size=1) | ||
rospy.Timer(rospy.Duration(1. / self.fft_exec_rate), self.timer_cb) | ||
|
||
def audio_cb(self, msg): | ||
# Save audio msg to audio_buffer | ||
data = msg.data | ||
audio_buffer = np.frombuffer(data, dtype=self.dtype) | ||
self.audio_buffer = np.append( | ||
self.audio_buffer, audio_buffer) | ||
self.audio_buffer = self.audio_buffer[ | ||
-self.audio_buffer_len:] | ||
|
||
def timer_cb(self, timer): | ||
if len(self.audio_buffer) != self.audio_buffer_len: | ||
return | ||
# Calc spectrum by fft | ||
amplitude = np.fft.fft(self.audio_buffer * self.window_function) | ||
amplitude = np.log(np.abs(amplitude)) | ||
spectrum_msg = Spectrum() | ||
spectrum_msg.header.stamp = rospy.Time.now() | ||
spectrum_msg.amplitude = amplitude | ||
spectrum_msg.frequency = self.freq | ||
self.pub_spectrum.publish(spectrum_msg) | ||
spectrum_msg.amplitude = amplitude[self.cutoff_mask] | ||
spectrum_msg.frequency = self.freq[self.cutoff_mask] | ||
self.pub_spectrum_filtered.publish(spectrum_msg) | ||
|
||
|
||
if __name__ == '__main__': | ||
rospy.init_node('audio_to_spectrum') | ||
AudioToSpectrum() | ||
rospy.spin() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import division | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import rospy | ||
|
||
from jsk_recognition_msgs.msg import Spectrum | ||
|
||
|
||
class SpectrumPlot(): | ||
|
||
def __init__(self): | ||
# Set matplotlib config | ||
self.fig = plt.figure(figsize=(8, 5)) | ||
self.fig.suptitle('Spectrum plot', size=12) | ||
self.fig.subplots_adjust(left=0.1, right=0.95, top=0.90, bottom=0.1, | ||
wspace=0.2, hspace=0.6) | ||
self.ax = self.fig.add_subplot(1, 1, 1) | ||
self.ax.grid(True) | ||
self.ax.set_xlabel('Frequency [Hz]', fontsize=12) | ||
self.ax.set_ylabel('Amplitude', fontsize=12) | ||
self.line, = self.ax.plot([0, 0], label='Amplitude of Spectrum') | ||
# ROS subscriber | ||
self.sub_spectrum = rospy.Subscriber( | ||
'~spectrum', Spectrum, self._cb, queue_size=1000) | ||
|
||
def _cb(self, msg): | ||
# Plot spectrum | ||
self.amp = np.array(msg.amplitude) | ||
self.freq = np.array(msg.frequency) | ||
self.line.set_data(self.freq, self.amp) | ||
self.ax.set_xlim((self.freq.min(), self.freq.max())) | ||
self.ax.set_ylim((0.0, 20)) | ||
self.ax.legend(loc='upper right') | ||
|
||
|
||
if __name__ == '__main__': | ||
rospy.init_node('spectrum_plot') | ||
SpectrumPlot() | ||
while not rospy.is_shutdown(): | ||
plt.pause(.1) # real-time plotting |
Oops, something went wrong.