From 7ae06d78ebe80490ba0cc3b6b6f532899cad554b Mon Sep 17 00:00:00 2001 From: Feiteng Date: Wed, 24 Jan 2024 15:35:57 +0800 Subject: [PATCH 1/2] Init pitch test --- kaldifeat/python/tests/CMakeLists.txt | 1 + kaldifeat/python/tests/test_pitch.py | 41 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100755 kaldifeat/python/tests/test_pitch.py diff --git a/kaldifeat/python/tests/CMakeLists.txt b/kaldifeat/python/tests/CMakeLists.txt index 4ccc891..2d6151a 100644 --- a/kaldifeat/python/tests/CMakeLists.txt +++ b/kaldifeat/python/tests/CMakeLists.txt @@ -23,6 +23,7 @@ set(py_test_files test_mel_bank_options.py test_mfcc.py test_mfcc_options.py + test_pitch.py test_plp.py test_plp_options.py test_spectrogram.py diff --git a/kaldifeat/python/tests/test_pitch.py b/kaldifeat/python/tests/test_pitch.py new file mode 100755 index 0000000..56d60a8 --- /dev/null +++ b/kaldifeat/python/tests/test_pitch.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +# Copyright 2021-2022 Xiaomi Corporation (authors: Fangjun Kuang) + +import pickle +from pathlib import Path + +import torch +from utils import get_devices, read_ark_txt, read_wave + +import kaldifeat + +cur_dir = Path(__file__).resolve().parent + + +def test_pitch_default(): + print("=====test_pitch_default=====") + filename = cur_dir / "test_data/test.wav" + wave = read_wave(filename) + gt = read_ark_txt(cur_dir / "test_data/test-pitch.txt") + + cpu_features = None + for device in get_devices(): + print("device", device) + opts = kaldifeat.PitchOptions() + opts.device = device + opts.frame_opts.dither = 0 + pitch = kaldifeat.Pitch(opts) + + features = pitch(wave) + assert features.device.type == "cpu" + assert torch.allclose(features, gt, rtol=1e-4) + if cpu_features is None: + cpu_features = features + + features = pitch(wave.to(device)) + assert features.device == device + assert torch.allclose(features.cpu(), gt, rtol=1e-4) + +if __name__ == "__main__": + test_pitch_default() From 5fca61b1d5690d5470a2d20200c0cb67fd17d304 Mon Sep 17 00:00:00 2001 From: Feiteng Date: Wed, 24 Jan 2024 17:36:25 +0800 Subject: [PATCH 2/2] Init pitch functions --- .gitignore | 1 + kaldifeat/csrc/CMakeLists.txt | 1 + kaldifeat/csrc/pitch-functions.cc | 14 +++ kaldifeat/csrc/pitch-functions.h | 44 +++++++- kaldifeat/python/csrc/CMakeLists.txt | 1 + kaldifeat/python/csrc/pitch-functions.cc | 133 +++++++++++++++++++++++ kaldifeat/python/csrc/pitch-functions.h | 16 +++ kaldifeat/python/csrc/utils.cc | 63 +++++++++++ kaldifeat/python/csrc/utils.h | 8 ++ kaldifeat/python/kaldifeat/__init__.py | 13 +++ kaldifeat/python/kaldifeat/pitch.py | 13 +++ 11 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 kaldifeat/csrc/pitch-functions.cc create mode 100644 kaldifeat/python/csrc/pitch-functions.cc create mode 100644 kaldifeat/python/csrc/pitch-functions.h create mode 100644 kaldifeat/python/kaldifeat/pitch.py diff --git a/.gitignore b/.gitignore index d6c034b..c067640 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__/ test-1hour.wav path.sh torch_version.py +*.DS_Store diff --git a/kaldifeat/csrc/CMakeLists.txt b/kaldifeat/csrc/CMakeLists.txt index f95dca8..8823779 100644 --- a/kaldifeat/csrc/CMakeLists.txt +++ b/kaldifeat/csrc/CMakeLists.txt @@ -7,6 +7,7 @@ set(kaldifeat_srcs feature-plp.cc feature-spectrogram.cc feature-window.cc + pitch-functions.cc matrix-functions.cc mel-computations.cc online-feature.cc diff --git a/kaldifeat/csrc/pitch-functions.cc b/kaldifeat/csrc/pitch-functions.cc new file mode 100644 index 0000000..ab22150 --- /dev/null +++ b/kaldifeat/csrc/pitch-functions.cc @@ -0,0 +1,14 @@ +// kaldifeat/csrc/feature-fbank.cc +// +// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) + +// This file is copied/modified from kaldi/src/feat/pitch-functions.cc + +#include "kaldifeat/csrc/pitch-functions.h" + +#include + +namespace kaldifeat { + + +} // namespace kaldifeat diff --git a/kaldifeat/csrc/pitch-functions.h b/kaldifeat/csrc/pitch-functions.h index 32b50dd..c789633 100644 --- a/kaldifeat/csrc/pitch-functions.h +++ b/kaldifeat/csrc/pitch-functions.h @@ -20,7 +20,8 @@ #include -#include "torch/script.h" +#include "kaldifeat/csrc/feature-common.h" +#include "kaldifeat/csrc/feature-window.h" namespace kaldifeat { @@ -144,10 +145,49 @@ struct PitchExtractionOptions { os << "nccf_ballast_online: " << nccf_ballast_online << "\n"; os << "snip_edges: " << snip_edges << "\n"; os << "device: " << device << "\n"; + return os.str(); } }; -// TODO(fangjun): Implement it + +std::ostream &operator<<(std::ostream &os, const PitchExtractionOptions &opts); + +class PitchComputer { + public: + using Options = PitchExtractionOptions; + + explicit PitchComputer(const PitchExtractionOptions &opts); + ~PitchComputer(); + + PitchComputer &operator=(const PitchComputer &) = delete; + PitchComputer(const PitchComputer &) = delete; + + int32_t Dim() const { + return 2; + } + + // // if true, compute log_energy_pre_window but after dithering and dc removal + // bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; } + + // if true, compute log_energy_pre_window but after dithering and dc removal + bool NeedRawLogEnergy() const; + + // const FrameExtractionOptions &GetFrameOptions() const { + // return opts_.frame_opts; + // } + + const FrameExtractionOptions &GetFrameOptions(); + + const PitchExtractionOptions &GetOptions() const { return opts_; } + + torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp, + const torch::Tensor &signal_frame); + + private: + PitchExtractionOptions opts_; +}; + +using Pitch = OfflineFeatureTpl; } // namespace kaldifeat diff --git a/kaldifeat/python/csrc/CMakeLists.txt b/kaldifeat/python/csrc/CMakeLists.txt index cf3bbae..a8443d9 100644 --- a/kaldifeat/python/csrc/CMakeLists.txt +++ b/kaldifeat/python/csrc/CMakeLists.txt @@ -5,6 +5,7 @@ pybind11_add_module(_kaldifeat feature-plp.cc feature-spectrogram.cc feature-window.cc + pitch-functions.cc kaldifeat.cc mel-computations.cc online-feature.cc diff --git a/kaldifeat/python/csrc/pitch-functions.cc b/kaldifeat/python/csrc/pitch-functions.cc new file mode 100644 index 0000000..76b48ce --- /dev/null +++ b/kaldifeat/python/csrc/pitch-functions.cc @@ -0,0 +1,133 @@ +// kaldifeat/python/csrc/pitch-functions.cc +// +// Copyright (c) 2024 (authors: Feiteng Li) + +#include "kaldifeat/python/csrc/pitch-functions.h" + +#include +#include + +#include "kaldifeat/csrc/pitch-functions.h" +#include "kaldifeat/python/csrc/utils.h" + +namespace kaldifeat { + +static void PybindPitchOptions(py::module &m) { + using PyClass = PitchExtractionOptions; + py::class_(m, "PitchOptions") + .def(py::init<>()) + .def(py::init([](float samp_freq = 16000, + float frame_shift_ms = 10.0, float frame_length_ms = 25.0, float preemph_coeff = 0.0, + float min_f0 = 50, float max_f0 = 400, float soft_min_f0 = 10.0, + float penalty_factor = 0.1, float lowpass_cutoff = 1000, float resample_freq = 4000, + float delta_pitch = 0.005, float nccf_ballast = 7000, + int32_t lowpass_filter_width = 1, int32_t upsample_filter_width = 5, + int32_t max_frames_latency = 0, int32_t frames_per_chunk = 0, + bool simulate_first_pass_online = false, int32_t recompute_frame = 500, + bool nccf_ballast_online = false, bool snip_edges = true, + py::object device = + py::str("cpu")) -> std::unique_ptr { + auto opts = std::make_unique(); + opts->samp_freq = samp_freq; + opts->frame_shift_ms = frame_shift_ms; + opts->frame_length_ms = frame_length_ms; + opts->preemph_coeff = preemph_coeff; + opts->min_f0 = min_f0; + opts->max_f0 = max_f0; + opts->soft_min_f0 = soft_min_f0; + opts->penalty_factor = penalty_factor; + opts->lowpass_cutoff = lowpass_cutoff; + opts->resample_freq = resample_freq; + opts->delta_pitch = delta_pitch; + opts->nccf_ballast = nccf_ballast; + opts->lowpass_filter_width = lowpass_filter_width; + opts->upsample_filter_width = upsample_filter_width; + opts->max_frames_latency = max_frames_latency; + opts->frames_per_chunk = frames_per_chunk; + opts->simulate_first_pass_online = simulate_first_pass_online; + opts->recompute_frame = recompute_frame; + opts->nccf_ballast_online = nccf_ballast_online; + opts->snip_edges = snip_edges; + + std::string s = static_cast(device); + opts->device = torch::Device(s); + + return opts; + }), + py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0, + py::arg("frame_length_ms") = 25.0, py::arg("preemph_coeff") = 0.0, + py::arg("min_f0") = 50, py::arg("max_f0") = 400, py::arg("soft_min_f0") = 10.0, + py::arg("penalty_factor") = 0.1, py::arg("lowpass_cutoff") = 1000, py::arg("resample_freq") = 4000, + py::arg("delta_pitch") = 0.005, py::arg("nccf_ballast") = 7000, + py::arg("lowpass_filter_width") = 1, py::arg("upsample_filter_width") = 5, + py::arg("max_frames_latency") = 0, py::arg("frames_per_chunk") = 0, + py::arg("simulate_first_pass_online") = false, py::arg("recompute_frame") = 500, + py::arg("nccf_ballast_online") = false, py::arg("snip_edges") = true, + py::arg("device") = py::str("cpu")) + + .def_readwrite("samp_freq", &PyClass::samp_freq) + .def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms) + .def_readwrite("frame_length_ms", &PyClass::frame_length_ms) + .def_readwrite("preemph_coeff", &PyClass::preemph_coeff) + .def_readwrite("min_f0", &PyClass::min_f0) + .def_readwrite("max_f0", &PyClass::max_f0) + .def_readwrite("soft_min_f0", &PyClass::soft_min_f0) + .def_readwrite("penalty_factor", &PyClass::penalty_factor) + .def_readwrite("lowpass_cutoff", &PyClass::lowpass_cutoff) + .def_readwrite("resample_freq", &PyClass::resample_freq) + .def_readwrite("delta_pitch", &PyClass::delta_pitch) + .def_readwrite("nccf_ballast", &PyClass::nccf_ballast) + .def_readwrite("lowpass_filter_width", &PyClass::lowpass_filter_width) + .def_readwrite("upsample_filter_width", &PyClass::upsample_filter_width) + .def_readwrite("max_frames_latency", &PyClass::max_frames_latency) + .def_readwrite("frames_per_chunk", &PyClass::frames_per_chunk) + .def_readwrite("simulate_first_pass_online", + &PyClass::simulate_first_pass_online) + .def_readwrite("recompute_frame", &PyClass::recompute_frame) + .def_readwrite("nccf_ballast_online", &PyClass::nccf_ballast_online) + .def_readwrite("snip_edges", &PyClass::snip_edges) + .def_property( + "device", + [](const PyClass &self) -> py::object { + py::object ans = py::module_::import("torch").attr("device"); + return ans(self.device.str()); + }, + [](PyClass &self, py::object obj) -> void { + std::string s = static_cast(obj); + self.device = torch::Device(s); + }) + .def("__str__", + [](const PyClass &self) -> std::string { return self.ToString(); }) + .def("as_dict", + [](const PyClass &self) -> py::dict { return AsDict(self); }) + .def_static( + "from_dict", + [](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); }) + .def(py::pickle( + [](const PyClass &self) -> py::dict { return AsDict(self); }, + [](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); })); +} + +static void PybindPitch(py::module &m) { + using PyClass = Pitch; + py::class_(m, "Pitch") + .def(py::init(), py::arg("opts")) + .def("dim", &PyClass::Dim) + .def_property_readonly("options", &PyClass::GetOptions) + .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"), + py::arg("vtln_warp"), py::call_guard()) + .def(py::pickle( + [](const PyClass &self) -> py::dict { + return AsDict(self.GetOptions()); + }, + [](py::dict dict) -> std::unique_ptr { + return std::make_unique(PitchOptionsFromDict(dict)); + })); +} + +void PybindPitchFunctions(py::module &m) { + PybindPitchOptions(m); + PybindPitch(m); +} + +} // namespace kaldifeat diff --git a/kaldifeat/python/csrc/pitch-functions.h b/kaldifeat/python/csrc/pitch-functions.h new file mode 100644 index 0000000..53f531e --- /dev/null +++ b/kaldifeat/python/csrc/pitch-functions.h @@ -0,0 +1,16 @@ +// kaldifeat/python/csrc/pitch-functions.h +// +// Copyright (c) 2024 (authors: Feiteng Li) + +#ifndef KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_ +#define KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_ + +#include "kaldifeat/python/csrc/kaldifeat.h" + +namespace kaldifeat { + +void PybindPitchFunctions(py::module &m); + +} // namespace kaldifeat + +#endif // KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_ diff --git a/kaldifeat/python/csrc/utils.cc b/kaldifeat/python/csrc/utils.cc index f7a6fc2..5817fef 100644 --- a/kaldifeat/python/csrc/utils.cc +++ b/kaldifeat/python/csrc/utils.cc @@ -152,6 +152,69 @@ py::dict AsDict(const WhisperFbankOptions &opts) { return dict; } + +PitchOptions PitchOptionsFromDict(py::dict dict) { + PitchOptions opts; + + FROM_DICT(float_, samp_freq); + FROM_DICT(float_, frame_shift_ms); + FROM_DICT(float_, frame_length_ms); + FROM_DICT(float_, preemph_coeff); + FROM_DICT(float_, min_f0); + FROM_DICT(float_, max_f0); + FROM_DICT(float_, soft_min_f0); + FROM_DICT(float_, penalty_factor); + FROM_DICT(float_, lowpass_cutoff); + FROM_DICT(float_, resample_freq); + FROM_DICT(float_, delta_pitch); + FROM_DICT(float_, nccf_ballast); + FROM_DICT(int_, lowpass_filter_width); + FROM_DICT(int_, upsample_filter_width); + FROM_DICT(int_, max_frames_latency); + FROM_DICT(int_, frames_per_chunk); + FROM_DICT(bool_, simulate_first_pass_online); + FROM_DICT(int_, recompute_frame); + FROM_DICT(bool_, nccf_ballast_online); + FROM_DICT(bool_, snip_edges); + + if (dict.contains("device")) { + opts.device = torch::Device(std::string(py::str(dict["device"]))); + } + + return opts; +} + +py::dict AsDict(const PitchOptions &opts) { + py::dict dict; + + AS_DICT(samp_freq); + AS_DICT(frame_shift_ms); + AS_DICT(frame_length_ms); + AS_DICT(preemph_coeff); + AS_DICT(min_f0); + AS_DICT(max_f0); + AS_DICT(soft_min_f0); + AS_DICT(penalty_factor); + AS_DICT(lowpass_cutoff); + AS_DICT(resample_freq); + AS_DICT(delta_pitch); + AS_DICT(nccf_ballast); + AS_DICT(lowpass_filter_width); + AS_DICT(upsample_filter_width); + AS_DICT(max_frames_latency); + AS_DICT(frames_per_chunk); + AS_DICT(simulate_first_pass_online); + AS_DICT(recompute_frame); + AS_DICT(nccf_ballast_online); + AS_DICT(snip_edges); + + auto torch_device = py::module_::import("torch").attr("device"); + dict["device"] = torch_device(opts.device.str()); + + return dict; +} + + MfccOptions MfccOptionsFromDict(py::dict dict) { MfccOptions opts; diff --git a/kaldifeat/python/csrc/utils.h b/kaldifeat/python/csrc/utils.h index 1b30b6e..2426aef 100644 --- a/kaldifeat/python/csrc/utils.h +++ b/kaldifeat/python/csrc/utils.h @@ -10,6 +10,7 @@ #include "kaldifeat/csrc/feature-plp.h" #include "kaldifeat/csrc/feature-spectrogram.h" #include "kaldifeat/csrc/feature-window.h" +#include "kaldifeat/csrc/pitch-functions.h" #include "kaldifeat/csrc/mel-computations.h" #include "kaldifeat/csrc/whisper-fbank.h" #include "kaldifeat/python/csrc/kaldifeat.h" @@ -26,8 +27,12 @@ * all fields use their default values. */ + namespace kaldifeat { +using PitchOptions = kaldifeat::PitchExtractionOptions; + + FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict); py::dict AsDict(const FrameExtractionOptions &opts); @@ -40,6 +45,9 @@ py::dict AsDict(const FbankOptions &opts); WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict); py::dict AsDict(const WhisperFbankOptions &opts); +PitchOptions PitchOptionsFromDict(py::dict dict); +py::dict AsDict(const PitchOptions &opts); + MfccOptions MfccOptionsFromDict(py::dict dict); py::dict AsDict(const MfccOptions &opts); diff --git a/kaldifeat/python/kaldifeat/__init__.py b/kaldifeat/python/kaldifeat/__init__.py index 247f4da..4606ec7 100644 --- a/kaldifeat/python/kaldifeat/__init__.py +++ b/kaldifeat/python/kaldifeat/__init__.py @@ -15,6 +15,7 @@ FrameExtractionOptions, MelBanksOptions, MfccOptions, + PitchOptions, PlpOptions, SpectrogramOptions, WhisperFbankOptions, @@ -25,9 +26,21 @@ from .mfcc import Mfcc, OnlineMfcc from .offline_feature import OfflineFeature from .online_feature import OnlineFeature +from .pitch import Pitch from .plp import OnlinePlp, Plp from .spectrogram import Spectrogram from .whisper_fbank import WhisperFbank cmake_prefix_path = _Path(__file__).parent / "share" / "cmake" del _Path + +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' +__version__ = '1.25.4.dev20240124+cpu.torch2.1.1' diff --git a/kaldifeat/python/kaldifeat/pitch.py b/kaldifeat/python/kaldifeat/pitch.py new file mode 100644 index 0000000..6e4a1fd --- /dev/null +++ b/kaldifeat/python/kaldifeat/pitch.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 (authors: Feiteng Li) + + +import _kaldifeat + +from .offline_feature import OfflineFeature + + +class Pitch(OfflineFeature): + def __init__(self, opts: _kaldifeat.FbankOptions): + super().__init__(opts) + self.computer = _kaldifeat.Pitch(opts) +