-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
2,452 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -169,5 +169,4 @@ Info.plist | |
/*.wav | ||
/*.flac | ||
output/ | ||
*.exe | ||
fish_audio_preprocess | ||
*.exe |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import click | ||
import richuru | ||
from loguru import logger | ||
|
||
from fish_audio_preprocess.cli.merge_lab import merge_lab | ||
|
||
from .convert_to_wav import to_wav | ||
from .frequency import frequency | ||
from .length import length | ||
from .loudness_norm import loudness_norm | ||
from .merge_short import merge_short | ||
from .resample import resample | ||
from .separate_audio import separate | ||
from .slice_audio import slice_audio, slice_audio_v2, slice_audio_v3 | ||
from .transcribe import transcribe | ||
|
||
|
||
@click.group() | ||
@click.option("--debug/--no-debug", default=False) | ||
def cli(debug: bool): | ||
"""An audio preprocessing CLI.""" | ||
|
||
if debug: | ||
richuru.install() | ||
logger.info("Debug mode is on") | ||
|
||
|
||
# Register subcommands | ||
cli.add_command(length) | ||
cli.add_command(frequency) | ||
|
||
cli.add_command(to_wav) | ||
cli.add_command(separate) | ||
cli.add_command(loudness_norm) | ||
cli.add_command(slice_audio) | ||
cli.add_command(slice_audio_v2) | ||
cli.add_command(slice_audio_v3) | ||
cli.add_command(resample) | ||
cli.add_command(transcribe) | ||
cli.add_command(merge_short) | ||
cli.add_command(merge_lab) | ||
|
||
|
||
if __name__ == "__main__": | ||
cli() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import subprocess as sp | ||
from pathlib import Path | ||
|
||
import click | ||
from loguru import logger | ||
from tqdm import tqdm | ||
|
||
from fish_audio_preprocess.utils.file import ( | ||
AUDIO_EXTENSIONS, | ||
VIDEO_EXTENSIONS, | ||
list_files, | ||
make_dirs, | ||
) | ||
|
||
|
||
@click.command() | ||
@click.argument("input_dir", type=click.Path(exists=True, file_okay=False)) | ||
@click.argument("output_dir", type=click.Path(exists=False, file_okay=False)) | ||
@click.option("--recursive/--no-recursive", default=True, help="Search recursively") | ||
@click.option( | ||
"--overwrite/--no-overwrite", default=False, help="Overwrite existing files" | ||
) | ||
@click.option( | ||
"--clean/--no-clean", default=False, help="Clean output directory before processing" | ||
) | ||
@click.option( | ||
"--segment", | ||
help="Maximum segment length in seconds, use 0 to disable", | ||
default=60 * 30, | ||
show_default=True, | ||
) | ||
def to_wav( | ||
input_dir: str, | ||
output_dir: str, | ||
recursive: bool, | ||
overwrite: bool, | ||
clean: bool, | ||
segment: int, | ||
): | ||
"""Converts all audio and video files in input_dir to wav files in output_dir.""" | ||
|
||
input_dir, output_dir = Path(input_dir), Path(output_dir) | ||
|
||
if input_dir == output_dir and clean: | ||
logger.error("You are trying to clean the input directory, aborting") | ||
return | ||
|
||
make_dirs(output_dir, clean) | ||
|
||
files = list_files( | ||
input_dir, extensions=VIDEO_EXTENSIONS | AUDIO_EXTENSIONS, recursive=recursive | ||
) | ||
logger.info(f"Found {len(files)} files, converting to wav") | ||
|
||
skipped = 0 | ||
for file in tqdm(files): | ||
# Get relative path to input_dir | ||
relative_path = file.relative_to(input_dir) | ||
new_file = ( | ||
output_dir | ||
/ relative_path.parent | ||
/ relative_path.name.replace( | ||
file.suffix, "_%04d.wav" if segment > 0 else ".wav" | ||
) | ||
) | ||
|
||
if new_file.parent.exists() is False: | ||
new_file.parent.mkdir(parents=True) | ||
|
||
check_path = ( | ||
(new_file.parent / (new_file.name % 0)) if segment > 0 else new_file | ||
) | ||
if check_path.exists() and not overwrite: | ||
skipped += 1 | ||
continue | ||
|
||
command = ["ffmpeg", "-y", "-nostats", "-loglevel", "error", "-i", str(file)] | ||
|
||
if segment > 0: | ||
command.extend(["-f", "segment", "-segment_time", str(segment)]) | ||
|
||
command.append(str(new_file)) | ||
|
||
sp.check_call(command) | ||
|
||
logger.info("Done!") | ||
logger.info(f"Total: {len(files)}, Skipped: {skipped}") | ||
logger.info(f"Output directory: {output_dir}") | ||
|
||
|
||
if __name__ == "__main__": | ||
to_wav() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import os | ||
from collections import Counter | ||
from concurrent.futures import ProcessPoolExecutor, as_completed | ||
from pathlib import Path | ||
from typing import Union | ||
|
||
import click | ||
import numpy as np | ||
from loguru import logger | ||
from tqdm import tqdm | ||
|
||
from fish_audio_preprocess.utils.file import list_files | ||
|
||
|
||
def count_notes_from_file(file: Union[Path, str]) -> Counter: | ||
"""Count the notes from a file | ||
Args: | ||
file (Union[Path, str]): The file to count the notes from | ||
Returns: | ||
Counter: A counter of the notes | ||
""" | ||
|
||
import librosa | ||
import parselmouth as pm | ||
|
||
pitch_ac = pm.Sound(str(file)).to_pitch_ac( | ||
voicing_threshold=0.6, | ||
pitch_floor=40.0, | ||
pitch_ceiling=1600.0, | ||
) | ||
f0 = pitch_ac.selected_array["frequency"] | ||
|
||
counter = Counter() | ||
for i in f0: | ||
if np.isinf(i) or np.isnan(i) or i == 0: | ||
continue | ||
|
||
counter[librosa.hz_to_note(i)] += 1 | ||
|
||
return counter | ||
|
||
|
||
@click.command() | ||
@click.argument("input_dir", type=click.Path(exists=True, file_okay=False)) | ||
@click.option("--recursive/--no-recursive", default=True, help="Search recursively") | ||
@click.option( | ||
"--visualize/--no-visualize", default=True, help="Visualize the distribution" | ||
) | ||
@click.option( | ||
"--num-workers", | ||
default=os.cpu_count(), | ||
help="Number of workers for parallel processing", | ||
) | ||
def frequency( | ||
input_dir: str, | ||
recursive: bool, | ||
visualize: bool, | ||
num_workers: int, | ||
): | ||
""" | ||
Get the frequency of all audio files in a directory | ||
""" | ||
|
||
import librosa | ||
from matplotlib import pyplot as plt | ||
|
||
input_dir = Path(input_dir) | ||
files = list_files(input_dir, {".wav"}, recursive=recursive) | ||
logger.info(f"Found {len(files)} files, calculating frequency") | ||
|
||
counter = Counter() | ||
|
||
with ProcessPoolExecutor(max_workers=num_workers) as executor: | ||
tasks = [ | ||
executor.submit(count_notes_from_file, file) | ||
for file in tqdm(files, desc="Preparing") | ||
] | ||
for i in tqdm(as_completed(tasks), desc="Collecting infos", total=len(tasks)): | ||
assert i.exception() is None, i.exception() | ||
counter += i.result() | ||
|
||
data = sorted(counter.items(), key=lambda kv: kv[1], reverse=True) | ||
|
||
for note, count in data: | ||
logger.info(f"{note}: {count}") | ||
|
||
if not visualize: | ||
return | ||
|
||
x_axis_order = librosa.midi_to_note(list(range(300))) | ||
x_axis_order = list(x_axis_order) | ||
data = sorted(counter.items(), key=lambda kv: x_axis_order.index(kv[0])) | ||
|
||
plt.rcParams["figure.figsize"] = [10, 4] | ||
plt.rcParams["figure.autolayout"] = True | ||
plt.bar([x[0] for x in data], [x[1] for x in data]) | ||
plt.xticks(rotation=90) | ||
plt.title("Notes distribution") | ||
plt.xlabel("Notes") | ||
plt.ylabel("Count") | ||
|
||
# Add grid to the plot | ||
plt.grid(axis="y", alpha=0.75) | ||
plt.grid(axis="x", alpha=0.75) | ||
|
||
# Add percentage to the plot | ||
total = sum(x[1] for x in data) | ||
for i, v in enumerate(x[1] for x in data): | ||
if v / total < 0.001: | ||
continue | ||
|
||
plt.text( | ||
i - 1, | ||
v + 1, | ||
f"{v / total * 100:.2f}%", | ||
color="black", | ||
fontweight="bold", | ||
) | ||
|
||
plt.show() | ||
|
||
|
||
if __name__ == "__main__": | ||
frequency() |
Oops, something went wrong.