diff --git a/python/visqol_lib_py.cc b/python/visqol_lib_py.cc index 63e45ea..caa0194 100644 --- a/python/visqol_lib_py.cc +++ b/python/visqol_lib_py.cc @@ -25,7 +25,7 @@ PYBIND11_MODULE(visqol_lib_py, m) { .def(pybind11::init<>()) .def("Init", pybind11::overload_cast(&Visqol::VisqolManager::Init)) + bool, bool, bool>(&Visqol::VisqolManager::Init)) .def("Run", pybind11::overload_cast( &Visqol::VisqolManager::Run)); diff --git a/python/visqol_lib_py_test.py b/python/visqol_lib_py_test.py index a2cb0d5..3b4d0aa 100644 --- a/python/visqol_lib_py_test.py +++ b/python/visqol_lib_py_test.py @@ -29,7 +29,7 @@ def _calculate_visqol(reference_file, degraded_file): ref_path = visqol_lib_py.FilePath(os.path.join(files_dir, reference_file)) deg_path = visqol_lib_py.FilePath(os.path.join(files_dir, degraded_file)) manager = visqol_lib_py.VisqolManager() - manager.Init(model_path, True, False, 60, True) + manager.Init(model_path, True, False, 60, True, False, False) similarity_result = manager.Run(ref_path, deg_path) return similarity_result diff --git a/src/commandline_parser.cc b/src/commandline_parser.cc index 3429fd8..9daa875 100644 --- a/src/commandline_parser.cc +++ b/src/commandline_parser.cc @@ -86,6 +86,8 @@ ABSL_FLAG(int, search_window_radius, 60, "search to discover patch matches. For a given reference frame, it " "will look at 2*search_window_radius + 1 patches to find the most " "optimal match."); +ABSL_FLAG(bool, disable_global_alignment, false, "Disables global alignment"); +ABSL_FLAG(bool, disable_realignment, false, "Disables realignment"); namespace Visqol { ABSL_CONST_INIT const char kDefaultAudioModelFile[] = @@ -113,6 +115,8 @@ absl::StatusOr VisqolCommandLineParser::Parse(int argc, bool use_lattice_model; bool use_unscaled_mapping; int search_window; + bool disable_global_alignment; + bool disable_realignment; batch_input = FilePath(absl::GetFlag(FLAGS_batch_input_csv)); if (!batch_input.Path().empty()) { @@ -132,6 +136,8 @@ absl::StatusOr VisqolCommandLineParser::Parse(int argc, verbose = absl::GetFlag(FLAGS_verbose); search_window = absl::GetFlag(FLAGS_search_window_radius); debug_output = FilePath(absl::GetFlag(FLAGS_output_debug)); + disable_global_alignment = absl::GetFlag(FLAGS_disable_global_alignment); + disable_realignment = absl::GetFlag(FLAGS_disable_realignment); similarity_to_quality_model = FilePath(absl::GetFlag(FLAGS_similarity_to_quality_model)); @@ -175,7 +181,9 @@ absl::StatusOr VisqolCommandLineParser::Parse(int argc, .use_speech_mode = use_speech, .use_unscaled_speech_mos_mapping = use_unscaled_mapping, .search_window_radius = search_window, - .use_lattice_model = use_lattice_model}; + .use_lattice_model = use_lattice_model, + .disable_global_alignment = disable_global_alignment, + .disable_realignment = disable_realignment}; } std::vector diff --git a/src/include/commandline_parser.h b/src/include/commandline_parser.h index 642ada6..7c73a88 100644 --- a/src/include/commandline_parser.h +++ b/src/include/commandline_parser.h @@ -106,6 +106,16 @@ struct CommandLineArgs { * If true, use a lattice model to map similarity to MOS. */ bool use_lattice_model = true; + + /** + * If true, disables global alignment. + **/ + bool disable_global_alignment; + + /** + * If true, disables patch-wise realignment. + **/ + bool disable_realignment; }; /** diff --git a/src/include/visqol.h b/src/include/visqol.h index 83de006..affc277 100644 --- a/src/include/visqol.h +++ b/src/include/visqol.h @@ -57,6 +57,7 @@ class Visqol { * score. * @param search_window This parameter is used to determine how far the * algorithm will search in order to find the most optimal match. + * @param disable_realignment Disables refined patch realignment * * @return If the comparison was successful, return the similarity result and * associated debug info. Else, return an error status. @@ -67,7 +68,7 @@ class Visqol { const ImagePatchCreator* patch_creator, const ComparisonPatchesSelector* comparison_patches_selector, const SimilarityToQualityMapper* sim_to_qual_mapper, - const int search_window) const; + const int search_window, const bool disable_realignment) const; /** * Produces a set of FVNSIM scores, which represent the similarity between diff --git a/src/include/visqol_manager.h b/src/include/visqol_manager.h index 8a0772e..e389f99 100644 --- a/src/include/visqol_manager.h +++ b/src/include/visqol_manager.h @@ -92,12 +92,16 @@ class VisqolManager { * a given reference patch. * @param use_lattice_model If true, use a lattice model for mapping * similarity to quality. + * @param disable_global_alignment Disables global alignment + * @param disable_realignment Disables refined patch realignment * * @return An 'OK' status if initialised successfully, else an error status. */ absl::Status Init(const FilePath& similarity_to_quality_mapper_model, bool use_speech_mode, bool use_unscaled_speech, - int search_window, bool use_lattice_model = true); + int search_window, bool use_lattice_model = true, + bool disable_global_alignment = false, + bool disable_realignment = false); /** * Initializes an instance for use with the given similarity to quality @@ -114,12 +118,16 @@ class VisqolManager { * a given reference patch. * @param use_lattice_model If true, use a lattice model for mapping * similarity to quality. + * @param disable_global_alignment Disables global alignment + * @param disable_realignment Disables refined patch realignment * * @return An 'OK' status if initialised successfully, else an error status. */ absl::Status Init(absl::string_view similarity_to_quality_mapper_model_string, bool use_speech_mode, bool use_unscaled_speech, - int search_window, bool use_lattice_model = true); + int search_window, bool use_lattice_model = true, + bool disable_global_alignment = false, + bool disable_realignment = false); /** * Perform a comparison on a single reference/degraded audio file pair. @@ -174,6 +182,16 @@ class VisqolManager { */ int search_window_ = 60; + /** + * True if global realignment step should be skipped. + */ + bool disable_global_alignment_ = false; + + /** + * True if per-patch realignment is disabled. + */ + bool disable_realignment_ = false; + /** * Used for creating the patches from both the reference and degraded signals * for comparison. diff --git a/src/main.cc b/src/main.cc index 4b37186..3adf107 100644 --- a/src/main.cc +++ b/src/main.cc @@ -35,7 +35,8 @@ int main(int argc, char** argv) { auto init_status = visqol.Init( cmd_args.similarity_to_quality_mapper_model, cmd_args.use_speech_mode, cmd_args.use_unscaled_speech_mos_mapping, cmd_args.search_window_radius, - cmd_args.use_lattice_model); + cmd_args.use_lattice_model, cmd_args.disable_global_alignment, + cmd_args.disable_realignment); if (!init_status.ok()) { ABSL_RAW_LOG(ERROR, "%s", init_status.ToString().c_str()); return -1; diff --git a/src/visqol.cc b/src/visqol.cc index 6bbbcf8..b42854a 100644 --- a/src/visqol.cc +++ b/src/visqol.cc @@ -39,7 +39,8 @@ absl::StatusOr Visqol::CalculateSimilarity( const ImagePatchCreator* patch_creator, const ComparisonPatchesSelector* comparison_patches_selector, const SimilarityToQualityMapper* sim_to_qual_mapper, - const int search_window) const { + const int search_window, + const bool disable_realignment) const { /////////////////// Stage 1: Preprocessing /////////////////// deg_signal = MiscAudio::ScaleToMatchSoundPressureLevel(ref_signal, deg_signal); @@ -89,14 +90,18 @@ absl::StatusOr Visqol::CalculateSimilarity( // Realign the patches in time domain subsignals that start at the coarse // patch times. - auto realign_result = - comparison_patches_selector->FinelyAlignAndRecreatePatches( - sim_match_info, ref_signal, deg_signal, spect_builder, window); - if (!realign_result.ok()) { - return realign_result.status(); - } + if (disable_realignment) { + sim_match_info = most_sim_patch_result.value(); + } else { + auto realign_result = + comparison_patches_selector->FinelyAlignAndRecreatePatches( + sim_match_info, ref_signal, deg_signal, spect_builder, window); + if (!realign_result.ok()) { + return realign_result.status(); + } - sim_match_info = realign_result.value(); + sim_match_info = realign_result.value(); + } AMatrix fvnsim = CalcPerPatchMeanFreqBandMeans(sim_match_info); AMatrix fvnsim10 = CalcPerPatchFreqBandQuantile(sim_match_info, 0.10); diff --git a/src/visqol_manager.cc b/src/visqol_manager.cc index 806d6c2..08627b1 100644 --- a/src/visqol_manager.cc +++ b/src/visqol_manager.cc @@ -51,11 +51,14 @@ const double VisqolManager::kDurationMismatchTolerance = 1.0; absl::Status VisqolManager::Init( const FilePath& similarity_to_quality_mapper_model, bool use_speech_mode, - bool use_unscaled_speech, int search_window, bool use_lattice_model) { + bool use_unscaled_speech, int search_window, bool use_lattice_model, + bool disable_global_alignment, bool disable_realignment) { use_speech_mode_ = use_speech_mode; use_unscaled_speech_mos_mapping_ = use_unscaled_speech; search_window_ = search_window; use_lattice_model_ = use_lattice_model; + disable_global_alignment_ = disable_global_alignment; + disable_realignment_ = disable_realignment; InitPatchCreator(); InitPatchSelector(); @@ -75,10 +78,12 @@ absl::Status VisqolManager::Init( absl::Status VisqolManager::Init( absl::string_view similarity_to_quality_mapper_model_string, bool use_speech_mode, bool use_unscaled_speech, int search_window, - bool use_lattice_model) { + bool use_lattice_model, bool disable_global_alignment, + bool disable_realignment) { return Init(FilePath(similarity_to_quality_mapper_model_string), use_speech_mode, use_unscaled_speech, search_window, - use_lattice_model); + use_lattice_model, disable_global_alignment, + disable_realignment); } void VisqolManager::InitPatchCreator() { @@ -154,9 +159,16 @@ absl::StatusOr VisqolManager::Run( VISQOL_RETURN_IF_ERROR(ValidateInputAudio(ref_signal, deg_signal)); - // Adjust for codec initial padding. - auto alignment_result = Alignment::GloballyAlign(ref_signal, deg_signal); - deg_signal = std::get<0>(alignment_result); + std::tuple alignment_result; + if (!disable_global_alignment_) { + // Adjust for codec initial padding. + alignment_result = Alignment::GloballyAlign(ref_signal, deg_signal); + deg_signal = std::get<0>(alignment_result); + } + else { + // If no alignment is performed, lag should be set to 0 + alignment_result = std::make_tuple(deg_signal, 0.0); + } const AnalysisWindow window{ref_signal.sample_rate, kOverlap}; @@ -168,7 +180,7 @@ absl::StatusOr VisqolManager::Run( sim_result, visqol.CalculateSimilarity( ref_signal, deg_signal, spectrogram_builder_.get(), window, patch_creator_.get(), patch_selector_.get(), - sim_to_qual_.get(), search_window_)); + sim_to_qual_.get(), search_window_, disable_realignment_)); SimilarityResultMsg sim_result_msg = PopulateSimResultMsg(sim_result); sim_result_msg.set_alignment_lag_s(std::get<1>(alignment_result)); return sim_result_msg;