Skip to content

Commit

Permalink
[silero_vad_ros] fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
sktometometo committed Apr 20, 2023
1 parent c16d146 commit 8b91bfb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
2 changes: 1 addition & 1 deletion silero_vad_ros/launch/sample.launch
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
type="speech_to_text.py"
output="log"
>
<remap from="audio" to="/webrtcvad_ros/speech_audio"/>
<remap from="audio" to="/silero_vad_ros/speech_audio"/>
<rosparam subst_value="true">
language: ja-JP
self_cancellation: false
Expand Down
19 changes: 8 additions & 11 deletions silero_vad_ros/node_scripts/vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,18 @@ def __init__(self):
force_reload=True)
self.model_vad = model_vad

super(SileroVADROS, self).__init__()
super(SileroVADROS, self).__init__(chunk_size=1536)

rospy.loginfo('Initialized.')

def _get_vad_confidence(self, chunk, sampling_rate):
print('chunk: {}'.format(chunk))
nparray = self._convert_bytearray_to_numpy_array(chunk)
print('nparray: {}'.format(nparray))
return self.model_vad(torch.from_numpy(nparray), sampling_rate).item()

def _convert_bytearray_to_numpy_array(self, data):
if self._audio_info.sample_format == 'S16LE':
return np.array(struct.unpack("{}h".format(int(len(data) / 2)), data))
else:
raise ValueError()
audio_chunk = np.frombuffer(chunk, np.int16)
abs_max = np.abs(audio_chunk).max()
audio_chunk = audio_chunk.astype('float32')
if abs_max > 0:
audio_chunk *= 1 / 32768
audio_chunk = audio_chunk.squeeze()
return self.model_vad(torch.from_numpy(audio_chunk), sampling_rate).item()


def main():
Expand Down

0 comments on commit 8b91bfb

Please sign in to comment.