Skip to content

Commit

Permalink
Merge branch 'master' into mask-rcnn-fpn
Browse files Browse the repository at this point in the history
  • Loading branch information
knorth55 authored May 26, 2020
2 parents 1379809 + ccdb666 commit 14964f9
Show file tree
Hide file tree
Showing 136 changed files with 2,417 additions and 272 deletions.
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ before_script:
- if [[ "$ROS_DISTRO" == "kinetic" && "$DOCKER_IMAGE_JENKINS" == "" ]]; then export export DOCKER_IMAGE_JENKINS='ros-ubuntu:16.04-pcl'; fi
- if [[ "$ROS_DISTRO" == "melodic" && "$DOCKER_IMAGE_JENKINS" == "" ]]; then export export DOCKER_IMAGE_JENKINS='ros-ubuntu:18.04-pcl'; fi
script:
# use https://github.com/ros-infrastructure/rosdep/pull/694 to respect version_lt for python pip, some package requries python3
- export BEFORE_SCRIPT="sudo pip install numpy==1.16.6; $BEFORE_SCRIPT"
- export BEFORE_SCRIPT="sudo pip install fcn chainercv chainer==6.7.0 cupy-cuda91; $BEFORE_SCRIPT"
- if [[ "$ROS_DISTRO" == "kinetic" && "$ROS_DISTRO" == "melodic" ]]; then export BEFORE_SCRIPT="sudo apt-get install -y patchutils; curl -sL https://patch-diff.githubusercontent.com/raw/ros-infrastructure/rosdep/pull/753.diff | sudo patch -d /usr/lib/python2.7/dist-packages/ -p2; curl -sL https://patch-diff.githubusercontent.com/raw/ros-infrastructure/rosdep/pull/694.diff | filterdiff --exclude='a/test/*' | sudo patch -d /usr/lib/python2.7/dist-packages/ -p2; $BEFORE_SCRIPT"; fi
- source .travis/travis.sh
- (cd $TRAVIS_BUILD_DIR/doc && source setup.sh && make html)
after_success:
Expand Down
16 changes: 16 additions & 0 deletions audio_to_spectrogram/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 2.8.3)
project(audio_to_spectrogram)

find_package(catkin)
catkin_package()

install(DIRECTORY launch sample scripts test
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS
)

if(CATKIN_ENABLE_TESTING)
find_package(catkin REQUIRED COMPONENTS rostest roslaunch)
add_rostest(test/audio_to_spectrogram.test)
roslaunch_add_file_check(launch/audio_to_spectrogram.launch)
endif()
86 changes: 86 additions & 0 deletions audio_to_spectrogram/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# audio_to_spectrogram

This package converts audio data to spectrum and spectrogram data.

# Usage
By following command, you can publish audio, spectrum and spectrogram topics. Please set correct args for your microphone configuration, such as mic\_sampling\_rate or bitdepth.

```bash
roslaunch audio_to_spectrogram audio_to_spectrogram.launch
```

Here is an example using rosbag with 300Hz audio.
```bash
roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
```

|Spectrum|Spectrogram|
|---|---|
|![](https://user-images.githubusercontent.com/19769486/82075694-9a7ac300-9717-11ea-899c-db6119a76d52.png)|![](https://user-images.githubusercontent.com/19769486/82075685-96e73c00-9717-11ea-9abc-e6e74104d666.png)|

# Scripts

## audio_to_spectrum.py
A script to convert audio to spectrum.

- ### Publishing topics

- `~spectrum` (`jsk_recognition_msgs/Spectrum`)

Spectrum data calculated from audio by FFT.

- ### Subscribing topics
- `audio` (`audio_common_msgs/AudioData`)

Audio stream data from microphone. The audio format must be `wave`.

- ### Parameters
- `~mic_sampling_rate` (`Int`, default: `16000`)

Sampling rate [Hz] of microphone. Namely, sampling rate of audio topic.

- `~fft_sampling_period` (`Double`, default: `0.3`)

Period [s] to sample audio data for one FFT.

- `~depth` (`Int`, default: `16`)

Number of bits per audio data.

- `~high_cut_freq` (`Int`, default: `800`)

Threshold to limit the maximum frequency of the output spectrum.

- `~low_cut_freq` (`Int`, default: `1`)

Threshold to limit the minimum frequency of the output spectrum.

## spectrum_to_spectrogram.py
A script to convert spectrum to spectrogram.

- ### Publishing topics
- `~spectrogram` (`sensor_msgs/Image`)

Spectrogram data, which is concatenation of spectrum in time series. Image format is 32FC1.

- ### Subscribing topics
- `~spectrum` (`jsk_recognition_msgs/Spectrum`)

Spectrum data calculated from audio by FFT.

- ### Parameters
- `~image_height` (`Int`, default: `300`)

Number of vertical (frequency axis) pixels in output spectrogram.

- `~image_width` (`Int`, default: `300`)

Number of horizontal (time axis) pixels in output spectrogram.

- `~spectrogram_period` (`Double`, default: `5`)

Period [s] to store spectrum data to create one spectrogram topic.

- `~publish_rate` (`Double`, default: `image_width / spectrogram_period`)

Publish rate [Hz] of spectrogram topic.
71 changes: 71 additions & 0 deletions audio_to_spectrogram/launch/audio_to_spectrogram.launch
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
<!-- data flow -->
<!-- audio -> spectrum -> spectrogram -->

<launch>
<arg name="launch_audio_capture" default="true" />

<arg name="bitdepth" default="16" />
<arg name="mic_sampling_rate" default="16000" />
<arg name="device" default="hw:0,0" />
<arg name="audio_topic" default="/audio" />
<arg name="high_cut_freq" default="800" />
<arg name="low_cut_freq" default="1" />
<arg name="spectrogram_period" default="5" />
<arg name="gui" default="true" />

<!-- Publish audio topic from microphone -->
<node name="audio_capture" pkg="audio_capture" type="audio_capture"
if="$(arg launch_audio_capture)"
respawn="true">
<rosparam subst_value="true">
format: wave
channels: 1
depth: $(arg bitdepth)
sample_rate: $(arg mic_sampling_rate)
device: $(arg device)
</rosparam>
</node>

<!-- convert audio topic to spectrum topic -->
<node pkg="audio_to_spectrogram" type="audio_to_spectrum.py" name="audio_to_spectrum" respawn="true">
<remap from="~audio" to="$(arg audio_topic)" />
<rosparam subst_value="true">
mic_sampling_rate: $(arg mic_sampling_rate)
fft_sampling_period: 0.3
bitdepth: $(arg bitdepth)
high_cut_freq: $(arg high_cut_freq)
low_cut_freq: $(arg low_cut_freq)
fft_exec_rate: 50
</rosparam>
</node>

<!-- convert spectrum topic to spectrogram topic -->
<node pkg="audio_to_spectrogram" type="spectrum_to_spectrogram.py" name="spectrum_to_spectrogram" respawn="true">
<remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" />
<rosparam subst_value="true">
image_height: 300
image_width: 300
spectrogram_period: $(arg spectrogram_period)
</rosparam>
</node>

<group if="$(arg gui)">
<!-- visualize spectrum -->
<node pkg="audio_to_spectrogram" type="spectrum_plot.py" name="spectrum_plot" >
<remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" />
</node>

<!-- visualize spectrogram -->
<!-- you can get spectrogram image topic from /spectrogram_view/output-->
<node pkg="image_view" type="image_view" name="spectrogram_view" >
<remap from="image" to="/spectrum_to_spectrogram/spectrogram" />
<rosparam>
<!-- set all pixels between 0 and 255 -->
do_dynamic_scaling: true
<!-- use jet colormap -->
colormap: 2
</rosparam>
</node>
</group>

</launch>
23 changes: 23 additions & 0 deletions audio_to_spectrogram/package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<package>
<name>audio_to_spectrogram</name>
<version>0.0.1</version>
<description>Convert audio (audio_common_msgs/AudioData) to spectrogram (sensor_msgs/Image) </description>

<maintainer email="[email protected]">Naoya Yamaguchi</maintainer>

<license>BSD</license>

<buildtool_depend>catkin</buildtool_depend>

<run_depend>audio_capture</run_depend>
<run_depend>audio_common_msgs</run_depend>
<run_depend>cv_bridge</run_depend>
<run_depend>image_view</run_depend>
<run_depend>jsk_recognition_msgs</run_depend>
<run_depend>python-matplotlib</run_depend>
<run_depend>sensor_msgs</run_depend>

<test_depend>roslaunch</test_depend>
<test_depend>rostest</test_depend>

</package>
Binary file added audio_to_spectrogram/sample/data/audio_300hz.bag
Binary file not shown.
19 changes: 19 additions & 0 deletions audio_to_spectrogram/sample/sample_audio_to_spectrogram.launch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!-- data flow -->
<!-- audio -> spectrum -> spectrogram -->

<launch>
<arg name="gui" default="true" />

<!-- play rosbag -->
<arg name="filename" default="$(find audio_to_spectrogram)/sample/data/audio_300hz.bag" />
<param name="use_sim_time" value="true" />
<node name="rosbag_play"
pkg="rosbag" type="play"
args="$(arg filename) --clock --loop"/>

<include file="$(find audio_to_spectrogram)/launch/audio_to_spectrogram.launch" >
<arg name="launch_audio_capture" value="false" />
<arg name="gui" value="$(arg gui)" />
</include>

</launch>
91 changes: 91 additions & 0 deletions audio_to_spectrogram/scripts/audio_to_spectrum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python

import numpy as np
import rospy

from audio_common_msgs.msg import AudioData
from jsk_recognition_msgs.msg import Spectrum


# This node execute FFT to audio (audio_common_msgs/AudioData)
# The number of audio channel is assumed to be 1.
# The format of audio topic is assumed to be wave.

class AudioToSpectrum(object):

def __init__(self):
super(AudioToSpectrum, self).__init__()

# Audio topic config
# Sampling rate of microphone (namely audio topic).
mic_sampling_rate = rospy.get_param('~mic_sampling_rate', 16000)
# Period[s] to sample audio data for one fft
fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3)
# Bits per one audio data
bitdepth = rospy.get_param('~bitdepth', 16)
if bitdepth == 16:
self.dtype = 'int16'
else:
rospy.logerr("'~bitdepth' {} is unsupported.".format(bitdepth))
# Audio topic buffer
self.audio_buffer = np.array([], dtype=self.dtype)
# How long audio_buffer should be for one audio topic
self.audio_buffer_len = int(mic_sampling_rate * fft_sampling_period)

# fft config
window_function = np.arange(
0.0, 1.0, 1.0 / self.audio_buffer_len)
self.window_function = 0.54 - 0.46 * np.cos(
2 * np.pi * window_function)
high_cut_freq = rospy.get_param('~high_cut_freq', 800)
if high_cut_freq > mic_sampling_rate / 2:
rospy.logerr('Set high_cut_freq lower than {} Hz'.format(
mic_sampling_rate / 2))
low_cut_freq = rospy.get_param('~low_cut_freq', 1) # remove 0 Hz
self.freq = np.fft.fftfreq(
self.audio_buffer_len, d=1./mic_sampling_rate)
self.cutoff_mask = np.where(
(low_cut_freq <= self.freq) & (self.freq <= high_cut_freq),
True, False)
# How many times fft is executed in one second
# fft_exec_rate equals to output spectrogram hz
self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50)

# Publisher and Subscriber
rospy.Subscriber(
'~audio', AudioData, self.audio_cb)
self.pub_spectrum = rospy.Publisher(
'~spectrum', Spectrum, queue_size=1)
self.pub_spectrum_filtered = rospy.Publisher(
'~spectrum_filtered', Spectrum, queue_size=1)
rospy.Timer(rospy.Duration(1. / self.fft_exec_rate), self.timer_cb)

def audio_cb(self, msg):
# Save audio msg to audio_buffer
data = msg.data
audio_buffer = np.frombuffer(data, dtype=self.dtype)
self.audio_buffer = np.append(
self.audio_buffer, audio_buffer)
self.audio_buffer = self.audio_buffer[
-self.audio_buffer_len:]

def timer_cb(self, timer):
if len(self.audio_buffer) != self.audio_buffer_len:
return
# Calc spectrum by fft
amplitude = np.fft.fft(self.audio_buffer * self.window_function)
amplitude = np.log(np.abs(amplitude))
spectrum_msg = Spectrum()
spectrum_msg.header.stamp = rospy.Time.now()
spectrum_msg.amplitude = amplitude
spectrum_msg.frequency = self.freq
self.pub_spectrum.publish(spectrum_msg)
spectrum_msg.amplitude = amplitude[self.cutoff_mask]
spectrum_msg.frequency = self.freq[self.cutoff_mask]
self.pub_spectrum_filtered.publish(spectrum_msg)


if __name__ == '__main__':
rospy.init_node('audio_to_spectrum')
AudioToSpectrum()
rospy.spin()
44 changes: 44 additions & 0 deletions audio_to_spectrogram/scripts/spectrum_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division

import matplotlib.pyplot as plt
import numpy as np
import rospy

from jsk_recognition_msgs.msg import Spectrum


class SpectrumPlot():

def __init__(self):
# Set matplotlib config
self.fig = plt.figure(figsize=(8, 5))
self.fig.suptitle('Spectrum plot', size=12)
self.fig.subplots_adjust(left=0.1, right=0.95, top=0.90, bottom=0.1,
wspace=0.2, hspace=0.6)
self.ax = self.fig.add_subplot(1, 1, 1)
self.ax.grid(True)
self.ax.set_xlabel('Frequency [Hz]', fontsize=12)
self.ax.set_ylabel('Amplitude', fontsize=12)
self.line, = self.ax.plot([0, 0], label='Amplitude of Spectrum')
# ROS subscriber
self.sub_spectrum = rospy.Subscriber(
'~spectrum', Spectrum, self._cb, queue_size=1000)

def _cb(self, msg):
# Plot spectrum
self.amp = np.array(msg.amplitude)
self.freq = np.array(msg.frequency)
self.line.set_data(self.freq, self.amp)
self.ax.set_xlim((self.freq.min(), self.freq.max()))
self.ax.set_ylim((0.0, 20))
self.ax.legend(loc='upper right')


if __name__ == '__main__':
rospy.init_node('spectrum_plot')
SpectrumPlot()
while not rospy.is_shutdown():
plt.pause(.1) # real-time plotting
Loading

0 comments on commit 14964f9

Please sign in to comment.