Fix sampling rate for non SND files

audeering · Dec 17, 2024 · 61a0a12 · 61a0a12
1 parent f32a612
commit 61a0a12
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 20 deletions.
diff --git a/audiofile/core/convert.py b/audiofile/core/convert.py
@@ -11,6 +11,7 @@ def convert(
     outfile: str,
     offset: float = 0,
     duration: float = None,
+    sampling_rate: int = None,
 ):
     """Convert any audio/video file to WAV.
 
@@ -19,16 +20,17 @@ def convert(
         outfile: WAV file name
         duration: return only a specified duration in seconds
         offset: start reading at offset in seconds
+        sampling_rate: sampling rate in Hz
 
     """
     try:
         # Convert to WAV file with sox
-        run_sox(infile, outfile, offset, duration)
+        run_sox(infile, outfile, offset, duration, sampling_rate)
     except (FileNotFoundError, subprocess.CalledProcessError):
         try:
             # Convert to WAV file with ffmpeg
-            run_ffmpeg(infile, outfile, offset, duration)
-        except FileNotFoundError:
+            run_ffmpeg(infile, outfile, offset, duration, sampling_rate)
+        except FileNotFoundError:  # pragma: no cover
             raise binary_missing_error("ffmpeg")
-        except subprocess.CalledProcessError:
+        except subprocess.CalledProcessError:  # pragma: no cover
             raise broken_file_error(infile)
diff --git a/audiofile/core/io.py b/audiofile/core/io.py
@@ -253,6 +253,7 @@ def read(
 
     """  # noqa: E501
     file = audeer.safe_path(file)
+    sampling_rate = None
 
     # Parse offset and duration values
     if (
@@ -386,7 +387,11 @@ def read(
                 offset /= sampling_rate
             if duration is not None and duration != 0:
                 duration /= sampling_rate
-            convert(file, tmpfile, offset, duration)
+            if sampling_rate is None:
+                from audiofile.core.info import sampling_rate as get_sampling_rate
+
+                sampling_rate = get_sampling_rate(file)
+            convert(file, tmpfile, offset, duration, sampling_rate)
             signal, sampling_rate = soundfile.read(
                 tmpfile,
                 dtype=dtype,

diff --git a/audiofile/core/utils.py b/audiofile/core/utils.py
@@ -90,19 +90,23 @@ def run(shell_command):
         return ""
 
 
-def run_ffmpeg(infile, outfile, offset, duration):
+def run_ffmpeg(infile, outfile, offset, duration, sampling_rate):
     """Convert audio file to WAV file."""
+    cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile]
     if duration:
-        cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, "-t", str(duration), outfile]
-    else:
-        cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile]
+        cmd.insert(-1, "-t")
+        cmd.insert(-1, str(duration))
+    if sampling_rate:
+        cmd.insert(-1, "-ar")
+        cmd.insert(-1, str(sampling_rate))
     run(cmd)
 
 
-def run_sox(infile, outfile, offset, duration):
+def run_sox(infile, outfile, offset, duration, sampling_rate):
     """Convert audio file to WAV file."""
+    cmd = ["sox", infile, outfile, "trim", str(offset)]
     if duration:
-        cmd = ["sox", infile, outfile, "trim", str(offset), str(duration)]
-    else:
-        cmd = ["sox", infile, outfile, "trim", str(offset)]
+        cmd.append(str(duration))
+    if sampling_rate:
+        cmd += ["rate", str(sampling_rate)]
     run(cmd)
diff --git a/tests/test_audiofile.py b/tests/test_audiofile.py
@@ -202,24 +202,24 @@ def test_empty_file(tmpdir, convert, empty_file):
 def test_missing_binaries(tmpdir, hide_system_path, empty_file):
     expected_error = FileNotFoundError
     # Reading file
-    with pytest.raises(expected_error, match="ffmpeg"):
+    with pytest.raises(expected_error, match="mediainfo"):
         signal, sampling_rate = af.read(empty_file)
     # Metadata
     with pytest.raises(expected_error, match="mediainfo"):
         af.channels(empty_file)
-    with pytest.raises(expected_error, match="ffmpeg"):
+    with pytest.raises(expected_error, match="mediainfo"):
         af.duration(empty_file)
     with pytest.raises(expected_error, match="mediainfo"):
         af.duration(empty_file, sloppy=True)
     with pytest.raises(expected_error, match="mediainfo"):
         af.has_video(empty_file)
-    with pytest.raises(expected_error, match="ffmpeg"):
+    with pytest.raises(expected_error, match="mediainfo"):
         af.samples(empty_file)
     with pytest.raises(expected_error, match="mediainfo"):
         af.sampling_rate(empty_file)
 
     # Convert
-    with pytest.raises(expected_error, match="ffmpeg"):
+    with pytest.raises(expected_error, match="mediainfo"):
         converted_file = str(tmpdir.join("signal-converted.wav"))
         af.convert_to_wav(empty_file, converted_file)
 
@@ -1243,7 +1243,7 @@ def test_read_duration_and_offset_rounding(
     # when reading with sox or ffmpeg
 
     # soundfile
-    signal, _ = af.read(audio_file, offset=offset, duration=duration)
+    signal, sampling_rate = af.read(audio_file, offset=offset, duration=duration)
     np.testing.assert_allclose(
         signal,
         np.array(expected, dtype=np.float32),
@@ -1259,7 +1259,7 @@ def test_read_duration_and_offset_rounding(
     # sox
     convert_file = str(tmpdir.join("signal-sox.wav"))
     try:
-        af.core.utils.run_sox(audio_file, convert_file, offset, duration)
+        af.core.utils.run_sox(audio_file, convert_file, offset, duration, sampling_rate)
         signal, _ = af.read(convert_file)
         np.testing.assert_allclose(
             signal,
@@ -1272,7 +1272,7 @@ def test_read_duration_and_offset_rounding(
 
     # ffmpeg
     convert_file = str(tmpdir.join("signal-ffmpeg.wav"))
-    af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration)
+    af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration, sampling_rate)
     signal, _ = af.read(convert_file)
     np.testing.assert_allclose(
         signal,