diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8f69bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +SailAlign-1.20.tar.gz +Build +MANIFEST.SKIP.bak +MANIFEST.bak +META.json +META.yml +MYMETA.json +MYMETA.yml +_build/ +bin/ +blib/ diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP new file mode 100644 index 0000000..b78c745 --- /dev/null +++ b/MANIFEST.SKIP @@ -0,0 +1,61 @@ + +#!start included /usr/lib/perl5/5.10.0/ExtUtils/MANIFEST.SKIP +# Avoid version control files. +\bRCS\b +\bCVS\b +\bSCCS\b +,v$ +\B\.svn\b +\b_darcs\b + +# Avoid Makemaker generated and utility files. +\bMANIFEST\.bak +\bMakefile$ +\bblib/ +\bMakeMaker-\d +\bpm_to_blib\.ts$ +\bpm_to_blib$ +\bblibdirs\.ts$ # 6.18 through 6.25 generated this + +# Avoid Module::Build generated and utility files. +\bBuild$ +\b_build/ + +# Avoid temp and backup files. +~$ +\.old$ +\#$ +\b\.# +\.bak$ + +# Avoid Devel::Cover files. +\bcover_db\b +#!end included /usr/lib/perl5/5.10.0/ExtUtils/MANIFEST.SKIP + +# Avoid configuration metadata file +^MYMETA\. + +# Avoid Module::Build generated and utility files. +\bBuild$ +\bBuild.bat$ +\b_build +\bBuild.COM$ +\bBUILD.COM$ +\bbuild.com$ +^MANIFEST\.SKIP + +# Avoid archives of this distribution +\bSailAlign-[\d\.\_]+ + +# Avoid versioning information +\B\.git\b +\B\.gitignore\b + +# Avoid local links to binary files +^bin/* + +# Avoild local tests and acoustic models +^support/test/local* +^support/resources/* +^models/ac_models/english/htk/* +^config/* diff --git a/config/timit_alignment.cfg b/config/timit_alignment.cfg new file mode 100644 index 0000000..cceb541 --- /dev/null +++ b/config/timit_alignment.cfg @@ -0,0 +1,301 @@ +# Sail Tools configuration file +#use File::Spec::Functions; +$ROOTPATH = "/home/nassos/workspace/SailAlign-1.20"; +$BIN_DIR = catdir( $ROOTPATH, 'bin' ); +%cfg = ( + experiment_id => $EXPERIMENT_ID, + working_dir => $WORKINGDIR, + features_directory => catdir( $WORKINGDIR, 'features', 'asr', 'htk' ), + bin_dir => $BIN_DIR, + vad_output_dir => 'vad', + alignment => { + do_adaptation => 1, + do_phon_alignment => 1, + do_forced_word_alignment => 1, + use_back_lm => 0, + max_utterance_duration => 10, + segmentation_script_suffix => 'cut', + utterance_list_suffix => 'utt', + wordlist => 'word.list', + dictionary => 'dictionary.dic', + language_model => 'language.model', + format => 'lab', + suffix => 'lab', + min_n_aligned_words => 6, + output_mode => 'words_uncertainties', + acoustic_model => { + defs => catdir( + $ROOTPATH, + 'models/ac_models/english/htk/wsj_all_10000_32/hmmdefs' + ), + macros => catdir( + $ROOTPATH, + 'models/ac_models/english/htk/wsj_all_10000_32/macros' + ), + tiedlist => catdir( + $ROOTPATH, + 'models/ac_models/english/htk/wsj_all_10000_32/tiedlist' + ), + }, + word_forced_alignment => { + dir => catdir( $WORKINGDIR, 'word' ), + mlf => catfile( $WORKINGDIR, 'word', 'word_align.mlf' ), + file_list => catfile( $WORKINGDIR, 'word', 'files.list'), + utt_duration => 10, + tool => 'htk', + log_level => 1, + track_model_boundaries => 0, + binary => 'HVite', + bin_path => $BIN_DIR, + prune => '250.0 150.0 1000.0', + label_format => 'SM', + configuration_file => + catfile( $WORKINGDIR, 'hvite_word_alignment.cfg' ), + triphone_context => 'T', + word_context => 'T', + sen_boundary => '', + oov_symbol => '!!UNK', + }, + phon_alignment => { + dir => catdir( $WORKINGDIR, 'phone' ), + mlf => catfile( $WORKINGDIR, 'phone', 'phone_align.mlf' ), + file_list => catfile( $WORKINGDIR, 'phone', 'files.list'), + utt_duration => 10, + tool => 'htk', + log_level => 1, + track_model_boundaries => 1, + binary => 'HVite', + bin_path => $BIN_DIR, + prune => '250.0 150.0 1000.0', + label_format => 'SW', + configuration_file => + catfile( $WORKINGDIR, 'hvite_alignment.cfg' ), + triphone_context => 'T', + word_context => 'T', + sen_boundary => '', + oov_symbol => '!!UNK', + }, + recognition_output => "recognition.mlf", + alignment_accuracy => 0.999, + max_n_iterations => 5, + unaligned_text_filename => "unaligned.txt", + vad => { + method => 'None', + tool => 'vad', + frame_rate => 0.01, + frame_size => 0.02, + model => catdir( $ROOTPATH, "models/vad/MattModel.bin" ), + output => "voice_activity.out", + lab_file => "voice_activity.lab", + voice_labels => [ "VOICE", "SIL" ], + }, + feature_extraction => { + feature_file_suffix => 'mfc', + configuration_file => 'feature_extraction.cfg', + tool => 'HCopy', + kind => 'MFCC_0_D_A_Z', + format => 'HTK', + rate => 0.01, + save_compressed => 'T', + save_with_crc => 'T', + window_size => 0.025, + use_hamming => 'T', + preemphasis_factor => 0.97, + n_filters => 26, + cepstral_liftering => 22, + n_cepstral_coefs => 12, + normalize_energy => 'F', + subtract_dc => 'T', + use_power => 'T', + log_level => 1, + }, + segmentation => { + tool => 'ch_track', + bin_dir => $BIN_DIR, + format => 'HTK', + cut_file_suffix => 'cut', + segment_list_suffix => 'seg', + }, + language_modeling => { + dictionary => { + tool => 'htk', + bin_dir => $BIN_DIR, + log_file_suffix => 'dlog', + reference => [ + catfile( $ROOTPATH, 'language', 'cmu_dictionary.dic' ), + catfile( $ROOTPATH, 'language', 'timit_dictionary.dic' ) + ], + output_symbols_list => + catfile( $ROOTPATH, 'language', 'output_symbols.list' ), + sp_model => 'sp', + sil_model => 'sil', + apply_phone_map => 1, + phone_map_direct => catfile( $ROOTPATH, 'language', 'timit2cmu_phones.map'), + phone_map_inverse => catfile( $ROOTPATH, 'language', 'cmu2timit_phones.map'), + file => catfile( $WORKINGDIR, 'dictionary' ), + }, + text => { + root_path => catdir( $WORKINGDIR, 'text' ), + format => 'txt', + suffix => 'txt', + }, + model => { + tool => 'srilm', + options => ['wbdiscount'], + format => 'sri', + bin_path => $BIN_DIR, + binary => 'ngram-count', + merge_binary => 'ngram', + suffix => 'lm', + order => 3, + path => catdir( $WORKINGDIR, 'lm' ), + oov_symbol => "!!UNK", + back_lm_weight => 0.1, + }, + wordlist => catfile( $WORKINGDIR, 'wordlists', 'words.list' ), + unknown_wordlist => catfile( $WORKINGDIR, 'wordlists','unknown_words.list' ), + wordlist_suffix => 'wlist', + }, + adaptation => { + src_acoustic_models => { + name => 'wsj_all_10000_32', + format => 'htk', + stats => 'stats_hmm82', + path => catdir( + $ROOTPATH, 'models', + 'ac_models', 'english', + 'htk', 'wsj_all_10000_32' + ), + file => 'hmmdefs', + macros => 'macros', + list => 'tiedlist', + }, + alignment => { + transcription_dir => catdir( $WORKINGDIR, 'adaptation' ), + mlf => catfile( $WORKINGDIR, 'adaptation', 'adaptation.mlf' ), + tool => 'htk', + log_level => 1, + track_model_boundaries => 1, + binary => 'HVite', + bin_path => $BIN_DIR, + prune => '250.0 150.0 1000.0', + label_format => 'SWT', + configuration_file => + catfile( $WORKINGDIR, 'hvite_alignment.cfg' ), + triphone_context => 'T', + word_context => 'T', + sen_boundary => '', + oov_symbol => '!!UNK', + }, + regression_class_tree => { + binary => 'HHEd', + log_level => 1, + bin_path => $BIN_DIR, + n_classes => 32, + class_dir => catdir( $WORKINGDIR, 'adaptation', 'classes' ), + base_class => 'global', + hed_file => catfile( $WORKINGDIR, 'adaptation', 'regtree.hed' ), + }, + path => catdir( $WORKINGDIR, 'adaptation' ), + transkind => 'MLLRMEAN', + binary => 'HERest', + log_level => 1, + class_dir => catdir( $WORKINGDIR, 'adaptation', 'classes' ), + base_class => 'global', + glob_trans_sfx => 'mllr1', + rc_trans_sfx => 'mllr2', + bin_path => $BIN_DIR, + glob_config_file => + catfile( $WORKINGDIR, 'adaptation' . 'config.global' ), + rc_config_file => + catfile( $WORKINGDIR, 'adaptation' . 'config.rc' ), + transforms_dir => catdir( $WORKINGDIR, 'adaptation', 'xforms' ), + transform_name_pattern => '*/%%*.mfc', + file_list => + catfile( $WORKINGDIR, 'adaptation', 'adaptation_files.scp' ), + }, + fsg => { + directory => catdir( $WORKINGDIR, 'fsg' ), + sen_boundary_phon => 'sil', + bin_path => $BIN_DIR, + binary => 'HParse', + sen_start => 'SENSTART', + sen_end => 'SENEND', + oov_symbol => '!!UNK', + }, + recognition => { + acoustic_models => { + name => 'wsj_all_10000_32', + format => 'htk', + stats => 'stats_hmm82', + path => catdir( + $ROOTPATH, 'models', + 'ac_models', 'english', + 'htk', 'wsj_all_10000_32' + ), + file => 'hmmdefs', + macros => 'macros', + list => 'tiedlist', + phone_set => catfile( $ROOTPATH, 'language', 'phones.list'), + }, + alignment => { + transcription_dir => catdir( $WORKINGDIR, 'adaptation' ), + mlf => catfile( $WORKINGDIR, 'adaptation', 'adaptation.mlf' ), + tool => 'htk', + log_level => 1, + binary => 'HVite', + bin_path => $BIN_DIR, + prune => '250.0 150.0 1000.0', + configuration_file => + catfile( $WORKINGDIR, 'hvite_alignment.cfg' ), + triphone_context => 'T', + word_context => 'T', + sen_boundary => '', + }, + lm_conversion => { + do_convert => 0, + binary => 'HBuild', + bin_path => $BIN_DIR, + raw_mit_format => 'T', + configuration_file => 'hbuild.cfg', + log_level => 1, + }, + language_model => { + format => 'htk', + utterance_delimiters => [ '', '' ], + oov_symbol => "!!UNK", + suffix => 'htklm', + path => catdir( $WORKINGDIR, 'lm' ), + }, + results => { + root_path => catdir( $WORKINGDIR, 'asr' ), + format => 'lab', + suffix => 'rec', + list => 'results.list', + file => catfile( $WORKINGDIR, 'asr', 'results.lab' ) + }, + log_level => 1, + working_dir => $WORKINGDIR, + filtered_models => [ 'sil', 'sp' ], + filter_transcriptions => 1, + tool => 'htk', + binary => 'HDecode', + bin_path => $BIN_DIR, + prune => '200.0 150.0 1000.0', + insert_pen => '-4.0', + lm_scale => '15.0', + configuration_file => catfile( $WORKINGDIR, 'asr.cfg' ), + triphone_context => 'T', + word_context => 'T', + }, + }, + text => { + format => 'LAB', + speaker_labels => [ 'H', 'W' ], + voice_labels => [ 'SIL', 'VOICE' ], + word_corrections_map => + catfile( $ROOTPATH, 'language', 'corrections.map' ), + word_deletions_list => + catfile( $ROOTPATH, 'language', 'deletions.list' ), + }, +); diff --git a/support/patches/audio_wav_read.patch b/support/patches/audio_wav_read.patch new file mode 100644 index 0000000..edc9a8d --- /dev/null +++ b/support/patches/audio_wav_read.patch @@ -0,0 +1,20 @@ +--- Wav/Read.pm 2012-03-31 00:13:15.000000000 +0300 ++++ /usr/lib/perl5/site_perl/5.10.0/Audio/Wav/Read.pm 2013-08-30 14:08:14.000000000 +0300 +@@ -246,7 +246,7 @@ + =cut + + # read is generated by _init_read_sub +-sub read { die "ERROR: can't call read without first calling _init_read_sub"; }; ++#sub read { die "ERROR: can't call read without first calling _init_read_sub"; }; + + sub _init_read_sub { + my $self = shift; +@@ -300,7 +300,7 @@ + $self -> {read_sub_string} = q[ + sub { + my $val; +- $self -> {pos} += read( $handle, $val, $block ); ++ $self -> {pos} += Core::read( $handle, $val, $block ); + return unless defined $val; + ] . $read_op . q[ + };