Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Impl. pitch functions #91

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ __pycache__/
test-1hour.wav
path.sh
torch_version.py
*.DS_Store
1 change: 1 addition & 0 deletions kaldifeat/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(kaldifeat_srcs
feature-plp.cc
feature-spectrogram.cc
feature-window.cc
pitch-functions.cc
matrix-functions.cc
mel-computations.cc
online-feature.cc
Expand Down
14 changes: 14 additions & 0 deletions kaldifeat/csrc/pitch-functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// kaldifeat/csrc/feature-fbank.cc
//
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)

// This file is copied/modified from kaldi/src/feat/pitch-functions.cc

#include "kaldifeat/csrc/pitch-functions.h"

#include <cmath>

namespace kaldifeat {


} // namespace kaldifeat
44 changes: 42 additions & 2 deletions kaldifeat/csrc/pitch-functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

#include <string>

#include "torch/script.h"
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"

namespace kaldifeat {

Expand Down Expand Up @@ -144,10 +145,49 @@ struct PitchExtractionOptions {
os << "nccf_ballast_online: " << nccf_ballast_online << "\n";
os << "snip_edges: " << snip_edges << "\n";
os << "device: " << device << "\n";
return os.str();
}
};

// TODO(fangjun): Implement it

std::ostream &operator<<(std::ostream &os, const PitchExtractionOptions &opts);

class PitchComputer {
public:
using Options = PitchExtractionOptions;

explicit PitchComputer(const PitchExtractionOptions &opts);
~PitchComputer();

PitchComputer &operator=(const PitchComputer &) = delete;
PitchComputer(const PitchComputer &) = delete;

int32_t Dim() const {
return 2;
}

// // if true, compute log_energy_pre_window but after dithering and dc removal
// bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }

// if true, compute log_energy_pre_window but after dithering and dc removal
bool NeedRawLogEnergy() const;

// const FrameExtractionOptions &GetFrameOptions() const {
// return opts_.frame_opts;
// }

const FrameExtractionOptions &GetFrameOptions();

const PitchExtractionOptions &GetOptions() const { return opts_; }

torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp,
const torch::Tensor &signal_frame);

private:
PitchExtractionOptions opts_;
};

using Pitch = OfflineFeatureTpl<PitchComputer>;

} // namespace kaldifeat

Expand Down
1 change: 1 addition & 0 deletions kaldifeat/python/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pybind11_add_module(_kaldifeat
feature-plp.cc
feature-spectrogram.cc
feature-window.cc
pitch-functions.cc
kaldifeat.cc
mel-computations.cc
online-feature.cc
Expand Down
133 changes: 133 additions & 0 deletions kaldifeat/python/csrc/pitch-functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// kaldifeat/python/csrc/pitch-functions.cc
//
// Copyright (c) 2024 (authors: Feiteng Li)

#include "kaldifeat/python/csrc/pitch-functions.h"

#include <memory>
#include <string>

#include "kaldifeat/csrc/pitch-functions.h"
#include "kaldifeat/python/csrc/utils.h"

namespace kaldifeat {

static void PybindPitchOptions(py::module &m) {
using PyClass = PitchExtractionOptions;
py::class_<PyClass>(m, "PitchOptions")
.def(py::init<>())
.def(py::init([](float samp_freq = 16000,
float frame_shift_ms = 10.0, float frame_length_ms = 25.0, float preemph_coeff = 0.0,
float min_f0 = 50, float max_f0 = 400, float soft_min_f0 = 10.0,
float penalty_factor = 0.1, float lowpass_cutoff = 1000, float resample_freq = 4000,
float delta_pitch = 0.005, float nccf_ballast = 7000,
int32_t lowpass_filter_width = 1, int32_t upsample_filter_width = 5,
int32_t max_frames_latency = 0, int32_t frames_per_chunk = 0,
bool simulate_first_pass_online = false, int32_t recompute_frame = 500,
bool nccf_ballast_online = false, bool snip_edges = true,
py::object device =
py::str("cpu")) -> std::unique_ptr<PitchExtractionOptions> {
auto opts = std::make_unique<PitchExtractionOptions>();
opts->samp_freq = samp_freq;
opts->frame_shift_ms = frame_shift_ms;
opts->frame_length_ms = frame_length_ms;
opts->preemph_coeff = preemph_coeff;
opts->min_f0 = min_f0;
opts->max_f0 = max_f0;
opts->soft_min_f0 = soft_min_f0;
opts->penalty_factor = penalty_factor;
opts->lowpass_cutoff = lowpass_cutoff;
opts->resample_freq = resample_freq;
opts->delta_pitch = delta_pitch;
opts->nccf_ballast = nccf_ballast;
opts->lowpass_filter_width = lowpass_filter_width;
opts->upsample_filter_width = upsample_filter_width;
opts->max_frames_latency = max_frames_latency;
opts->frames_per_chunk = frames_per_chunk;
opts->simulate_first_pass_online = simulate_first_pass_online;
opts->recompute_frame = recompute_frame;
opts->nccf_ballast_online = nccf_ballast_online;
opts->snip_edges = snip_edges;

std::string s = static_cast<py::str>(device);
opts->device = torch::Device(s);

return opts;
}),
py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0,
py::arg("frame_length_ms") = 25.0, py::arg("preemph_coeff") = 0.0,
py::arg("min_f0") = 50, py::arg("max_f0") = 400, py::arg("soft_min_f0") = 10.0,
py::arg("penalty_factor") = 0.1, py::arg("lowpass_cutoff") = 1000, py::arg("resample_freq") = 4000,
py::arg("delta_pitch") = 0.005, py::arg("nccf_ballast") = 7000,
py::arg("lowpass_filter_width") = 1, py::arg("upsample_filter_width") = 5,
py::arg("max_frames_latency") = 0, py::arg("frames_per_chunk") = 0,
py::arg("simulate_first_pass_online") = false, py::arg("recompute_frame") = 500,
py::arg("nccf_ballast_online") = false, py::arg("snip_edges") = true,
py::arg("device") = py::str("cpu"))

.def_readwrite("samp_freq", &PyClass::samp_freq)
.def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
.def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
.def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
.def_readwrite("min_f0", &PyClass::min_f0)
.def_readwrite("max_f0", &PyClass::max_f0)
.def_readwrite("soft_min_f0", &PyClass::soft_min_f0)
.def_readwrite("penalty_factor", &PyClass::penalty_factor)
.def_readwrite("lowpass_cutoff", &PyClass::lowpass_cutoff)
.def_readwrite("resample_freq", &PyClass::resample_freq)
.def_readwrite("delta_pitch", &PyClass::delta_pitch)
.def_readwrite("nccf_ballast", &PyClass::nccf_ballast)
.def_readwrite("lowpass_filter_width", &PyClass::lowpass_filter_width)
.def_readwrite("upsample_filter_width", &PyClass::upsample_filter_width)
.def_readwrite("max_frames_latency", &PyClass::max_frames_latency)
.def_readwrite("frames_per_chunk", &PyClass::frames_per_chunk)
.def_readwrite("simulate_first_pass_online",
&PyClass::simulate_first_pass_online)
.def_readwrite("recompute_frame", &PyClass::recompute_frame)
.def_readwrite("nccf_ballast_online", &PyClass::nccf_ballast_online)
.def_readwrite("snip_edges", &PyClass::snip_edges)
.def_property(
"device",
[](const PyClass &self) -> py::object {
py::object ans = py::module_::import("torch").attr("device");
return ans(self.device.str());
},
[](PyClass &self, py::object obj) -> void {
std::string s = static_cast<py::str>(obj);
self.device = torch::Device(s);
})
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static(
"from_dict",
[](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); })
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); }));
}

static void PybindPitch(py::module &m) {
using PyClass = Pitch;
py::class_<PyClass>(m, "Pitch")
.def(py::init<const PitchOptions &>(), py::arg("opts"))
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
},
[](py::dict dict) -> std::unique_ptr<PyClass> {
return std::make_unique<PyClass>(PitchOptionsFromDict(dict));
}));
}

void PybindPitchFunctions(py::module &m) {
PybindPitchOptions(m);
PybindPitch(m);
}

} // namespace kaldifeat
16 changes: 16 additions & 0 deletions kaldifeat/python/csrc/pitch-functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// kaldifeat/python/csrc/pitch-functions.h
//
// Copyright (c) 2024 (authors: Feiteng Li)

#ifndef KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_
#define KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_

#include "kaldifeat/python/csrc/kaldifeat.h"

namespace kaldifeat {

void PybindPitchFunctions(py::module &m);

} // namespace kaldifeat

#endif // KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_
63 changes: 63 additions & 0 deletions kaldifeat/python/csrc/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,69 @@ py::dict AsDict(const WhisperFbankOptions &opts) {
return dict;
}


PitchOptions PitchOptionsFromDict(py::dict dict) {
PitchOptions opts;

FROM_DICT(float_, samp_freq);
FROM_DICT(float_, frame_shift_ms);
FROM_DICT(float_, frame_length_ms);
FROM_DICT(float_, preemph_coeff);
FROM_DICT(float_, min_f0);
FROM_DICT(float_, max_f0);
FROM_DICT(float_, soft_min_f0);
FROM_DICT(float_, penalty_factor);
FROM_DICT(float_, lowpass_cutoff);
FROM_DICT(float_, resample_freq);
FROM_DICT(float_, delta_pitch);
FROM_DICT(float_, nccf_ballast);
FROM_DICT(int_, lowpass_filter_width);
FROM_DICT(int_, upsample_filter_width);
FROM_DICT(int_, max_frames_latency);
FROM_DICT(int_, frames_per_chunk);
FROM_DICT(bool_, simulate_first_pass_online);
FROM_DICT(int_, recompute_frame);
FROM_DICT(bool_, nccf_ballast_online);
FROM_DICT(bool_, snip_edges);

if (dict.contains("device")) {
opts.device = torch::Device(std::string(py::str(dict["device"])));
}

return opts;
}

py::dict AsDict(const PitchOptions &opts) {
py::dict dict;

AS_DICT(samp_freq);
AS_DICT(frame_shift_ms);
AS_DICT(frame_length_ms);
AS_DICT(preemph_coeff);
AS_DICT(min_f0);
AS_DICT(max_f0);
AS_DICT(soft_min_f0);
AS_DICT(penalty_factor);
AS_DICT(lowpass_cutoff);
AS_DICT(resample_freq);
AS_DICT(delta_pitch);
AS_DICT(nccf_ballast);
AS_DICT(lowpass_filter_width);
AS_DICT(upsample_filter_width);
AS_DICT(max_frames_latency);
AS_DICT(frames_per_chunk);
AS_DICT(simulate_first_pass_online);
AS_DICT(recompute_frame);
AS_DICT(nccf_ballast_online);
AS_DICT(snip_edges);

auto torch_device = py::module_::import("torch").attr("device");
dict["device"] = torch_device(opts.device.str());

return dict;
}


MfccOptions MfccOptionsFromDict(py::dict dict) {
MfccOptions opts;

Expand Down
8 changes: 8 additions & 0 deletions kaldifeat/python/csrc/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "kaldifeat/csrc/feature-plp.h"
#include "kaldifeat/csrc/feature-spectrogram.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/pitch-functions.h"
#include "kaldifeat/csrc/mel-computations.h"
#include "kaldifeat/csrc/whisper-fbank.h"
#include "kaldifeat/python/csrc/kaldifeat.h"
Expand All @@ -26,8 +27,12 @@
* all fields use their default values.
*/


namespace kaldifeat {

using PitchOptions = kaldifeat::PitchExtractionOptions;


FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
py::dict AsDict(const FrameExtractionOptions &opts);

Expand All @@ -40,6 +45,9 @@ py::dict AsDict(const FbankOptions &opts);
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict);
py::dict AsDict(const WhisperFbankOptions &opts);

PitchOptions PitchOptionsFromDict(py::dict dict);
py::dict AsDict(const PitchOptions &opts);

MfccOptions MfccOptionsFromDict(py::dict dict);
py::dict AsDict(const MfccOptions &opts);

Expand Down
13 changes: 13 additions & 0 deletions kaldifeat/python/kaldifeat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
FrameExtractionOptions,
MelBanksOptions,
MfccOptions,
PitchOptions,
PlpOptions,
SpectrogramOptions,
WhisperFbankOptions,
Expand All @@ -25,9 +26,21 @@
from .mfcc import Mfcc, OnlineMfcc
from .offline_feature import OfflineFeature
from .online_feature import OnlineFeature
from .pitch import Pitch
from .plp import OnlinePlp, Plp
from .spectrogram import Spectrogram
from .whisper_fbank import WhisperFbank

cmake_prefix_path = _Path(__file__).parent / "share" / "cmake"
del _Path

__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
13 changes: 13 additions & 0 deletions kaldifeat/python/kaldifeat/pitch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024 (authors: Feiteng Li)


import _kaldifeat

from .offline_feature import OfflineFeature


class Pitch(OfflineFeature):
def __init__(self, opts: _kaldifeat.FbankOptions):
super().__init__(opts)
self.computer = _kaldifeat.Pitch(opts)

Loading