Skip to content

Commit

Permalink
Fix sampling rate for reading opus files (#158)
Browse files Browse the repository at this point in the history
* Fix sampling rate for non SND files

* Add test for 16000 Hz opus file

* Pass on error

* Update test asset README

* Add comment to code
  • Loading branch information
hagenw authored Jan 3, 2025
1 parent 69b0c97 commit c0b02cb
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 23 deletions.
14 changes: 8 additions & 6 deletions audiofile/core/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def convert(
outfile: str,
offset: float = 0,
duration: float = None,
sampling_rate: int = None,
):
"""Convert any audio/video file to WAV.
Expand All @@ -19,16 +20,17 @@ def convert(
outfile: WAV file name
duration: return only a specified duration in seconds
offset: start reading at offset in seconds
sampling_rate: sampling rate in Hz
"""
try:
# Convert to WAV file with sox
run_sox(infile, outfile, offset, duration)
run_sox(infile, outfile, offset, duration, sampling_rate)
except (FileNotFoundError, subprocess.CalledProcessError):
try:
# Convert to WAV file with ffmpeg
run_ffmpeg(infile, outfile, offset, duration)
except FileNotFoundError:
raise binary_missing_error("ffmpeg")
except subprocess.CalledProcessError:
raise broken_file_error(infile)
run_ffmpeg(infile, outfile, offset, duration, sampling_rate)
except FileNotFoundError as e: # pragma: no cover
raise binary_missing_error("ffmpeg") from e
except subprocess.CalledProcessError as e: # pragma: no cover
raise broken_file_error(infile) from e
12 changes: 11 additions & 1 deletion audiofile/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def read(
""" # noqa: E501
file = audeer.safe_path(file)
sampling_rate = None

# Parse offset and duration values
if (
Expand Down Expand Up @@ -384,7 +385,16 @@ def read(
offset /= sampling_rate
if duration is not None and duration != 0:
duration /= sampling_rate
convert(file, tmpfile, offset, duration)
if sampling_rate is None:
# Infer sampling rate using mediainfo before conversion,
# as ffmpeg does ignore the original sampling rate for opus files,
# see:
# * https://trac.ffmpeg.org/ticket/5240
# * https://github.com/audeering/audiofile/issues/157
from audiofile.core.info import sampling_rate as get_sampling_rate

sampling_rate = get_sampling_rate(file)
convert(file, tmpfile, offset, duration, sampling_rate)
signal, sampling_rate = soundfile.read(
tmpfile,
dtype=dtype,
Expand Down
20 changes: 12 additions & 8 deletions audiofile/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,23 @@ def run(shell_command):
return ""


def run_ffmpeg(infile, outfile, offset, duration):
def run_ffmpeg(infile, outfile, offset, duration, sampling_rate):
"""Convert audio file to WAV file."""
cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile]
if duration:
cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, "-t", str(duration), outfile]
else:
cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile]
cmd.insert(-1, "-t")
cmd.insert(-1, str(duration))
if sampling_rate:
cmd.insert(-1, "-ar")
cmd.insert(-1, str(sampling_rate))
run(cmd)


def run_sox(infile, outfile, offset, duration):
def run_sox(infile, outfile, offset, duration, sampling_rate):
"""Convert audio file to WAV file."""
cmd = ["sox", infile, outfile, "trim", str(offset)]
if duration:
cmd = ["sox", infile, outfile, "trim", str(offset), str(duration)]
else:
cmd = ["sox", infile, outfile, "trim", str(offset)]
cmd.append(str(duration))
if sampling_rate:
cmd += ["rate", str(sampling_rate)]
run(cmd)
7 changes: 7 additions & 0 deletions tests/assets/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ Kevin MacLeod (incompetech.com),
licensed under Creative Commons:
[CC-BY-3.0](http://creativecommons.org/licenses/by/3.0/).

We converted the file `gs-16b-1c-44100hz.opus`
(which was stored wrongly with 48000 Hz)
to `gs-16b-1c-16000hz.opus` using
```bash
ffmpeg -y -i gs-16b-1c-44100hz.opus -ac 1 -ar 16000 gs-16b-1c-16000hz-fixed.opus
```

## Video test files

The folder contains the video file `video.mp4`,
Expand Down
Binary file added tests/assets/gs-16b-1c-16000hz.opus
Binary file not shown.
Binary file removed tests/assets/gs-16b-1c-44100hz.opus
Binary file not shown.
16 changes: 8 additions & 8 deletions tests/test_audiofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,24 +202,24 @@ def test_empty_file(tmpdir, convert, empty_file):
def test_missing_binaries(tmpdir, hide_system_path, empty_file):
expected_error = FileNotFoundError
# Reading file
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
signal, sampling_rate = af.read(empty_file)
# Metadata
with pytest.raises(expected_error, match="mediainfo"):
af.channels(empty_file)
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
af.duration(empty_file)
with pytest.raises(expected_error, match="mediainfo"):
af.duration(empty_file, sloppy=True)
with pytest.raises(expected_error, match="mediainfo"):
af.has_video(empty_file)
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
af.samples(empty_file)
with pytest.raises(expected_error, match="mediainfo"):
af.sampling_rate(empty_file)

# Convert
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
converted_file = str(tmpdir.join("signal-converted.wav"))
af.convert_to_wav(empty_file, converted_file)

Expand Down Expand Up @@ -512,7 +512,7 @@ def test_file_type(tmpdir, file_type, magnitude, sampling_rate, channels):
@pytest.mark.parametrize(
"file, header_duration, audio, video", # header duration as given by mediainfo
[
("gs-16b-1c-44100hz.opus", 15.839, True, False),
("gs-16b-1c-16000hz.opus", 15.839, True, False),
("gs-16b-1c-8000hz.amr", 15.840000, True, False),
("gs-16b-1c-44100hz.m4a", 15.833, True, False),
("gs-16b-1c-44100hz.aac", None, True, False),
Expand Down Expand Up @@ -1243,7 +1243,7 @@ def test_read_duration_and_offset_rounding(
# when reading with sox or ffmpeg

# soundfile
signal, _ = af.read(audio_file, offset=offset, duration=duration)
signal, sampling_rate = af.read(audio_file, offset=offset, duration=duration)
np.testing.assert_allclose(
signal,
np.array(expected, dtype=np.float32),
Expand All @@ -1259,7 +1259,7 @@ def test_read_duration_and_offset_rounding(
# sox
convert_file = str(tmpdir.join("signal-sox.wav"))
try:
af.core.utils.run_sox(audio_file, convert_file, offset, duration)
af.core.utils.run_sox(audio_file, convert_file, offset, duration, sampling_rate)
signal, _ = af.read(convert_file)
np.testing.assert_allclose(
signal,
Expand All @@ -1272,7 +1272,7 @@ def test_read_duration_and_offset_rounding(

# ffmpeg
convert_file = str(tmpdir.join("signal-ffmpeg.wav"))
af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration)
af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration, sampling_rate)
signal, _ = af.read(convert_file)
np.testing.assert_allclose(
signal,
Expand Down

0 comments on commit c0b02cb

Please sign in to comment.