Skip to content

Commit

Permalink
Patch audio summay to support multiple channels and save faster (#575)
Browse files Browse the repository at this point in the history
* Faster saving using soundfile, support multiple channels

* Support multiple channels, change shape from (c, L) to (L,c)

* fix test and docs

Co-authored-by: Tzu-Wei Huang <[email protected]>
  • Loading branch information
prafullasd and lanpa authored Jun 29, 2020
1 parent eb7e8d2 commit f4a6c73
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 21 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ install:
- pip install tb-nightly
- pip install crc32c
- pip install protobuf==3.8.0
- pip install SoundFile
- conda install ffmpeg
- conda list
- python -c "import imageio; imageio.plugins.ffmpeg.download()"
Expand Down
39 changes: 19 additions & 20 deletions tensorboardX/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,32 +392,31 @@ def make_video(tensor, fps):


def audio(tag, tensor, sample_rate=44100):
"""
Args:
tensor: A 2-D float Tensor of shape `[frames, channels]` where `channels` is 1 or 2.
The values should between [-1, 1]. We also accepts 1-D tensor.
"""
import io
import soundfile
tensor = make_np(tensor)
tensor = tensor.squeeze()
if abs(tensor).max() > 1:
print('warning: audio amplitude out of range, auto clipped.')
tensor = tensor.clip(-1, 1)
assert(tensor.ndim == 1), 'input tensor should be 1 dimensional.'
if tensor.ndim == 1: # old API, which expects single channel audio
tensor = np.expand_dims(tensor, axis=1)

assert(tensor.ndim == 2), 'Input tensor should be 2 dimensional.'
length_frames, num_channels = tensor.shape
assert num_channels == 1 or num_channels == 2, 'The second dimension should be 1 or 2.'

with io.BytesIO() as fio:
soundfile.write(fio, tensor, samplerate=sample_rate, format='wav')
audio_string = fio.getvalue()

tensor_list = [int(32767.0 * x) for x in tensor]
import io
import wave
import struct
fio = io.BytesIO()
Wave_write = wave.open(fio, 'wb')
Wave_write.setnchannels(1)
Wave_write.setsampwidth(2)
Wave_write.setframerate(sample_rate)
tensor_enc = b''
tensor_enc += struct.pack("<" + "h" * len(tensor_list), *tensor_list)

Wave_write.writeframes(tensor_enc)
Wave_write.close()
audio_string = fio.getvalue()
fio.close()
audio = Summary.Audio(sample_rate=sample_rate,
num_channels=1,
length_frames=len(tensor_list),
num_channels=num_channels,
length_frames=length_frames,
encoded_audio_string=audio_string,
content_type='audio/wav')
return Summary(value=[Summary.Value(tag=tag, audio=audio)])
Expand Down
3 changes: 2 additions & 1 deletion tensorboardX/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,8 @@ def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100, wallti
sample_rate (int): sample rate in Hz
walltime (float): Optional override default walltime (time.time()) of event
Shape:
snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1].
snd_tensor: :math:`(L, C)`. The values should lie between [-1, 1]. Where `L`
is the number of audio frames and `C` is the channel. 1 for mono, 2 for stereo.
"""
if self._check_caffe2_blob(snd_tensor):
snd_tensor = workspace.FetchBlob(snd_tensor)
Expand Down

0 comments on commit f4a6c73

Please sign in to comment.