Skip to content

Commit

Permalink
preparing release 1.20
Browse files Browse the repository at this point in the history
  • Loading branch information
Nassos Katsamanis committed Aug 30, 2013
1 parent bb5f962 commit abc262a
Show file tree
Hide file tree
Showing 4 changed files with 393 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SailAlign-1.20.tar.gz
Build
MANIFEST.SKIP.bak
MANIFEST.bak
META.json
META.yml
MYMETA.json
MYMETA.yml
_build/
bin/
blib/
61 changes: 61 additions & 0 deletions MANIFEST.SKIP
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

#!start included /usr/lib/perl5/5.10.0/ExtUtils/MANIFEST.SKIP
# Avoid version control files.
\bRCS\b
\bCVS\b
\bSCCS\b
,v$
\B\.svn\b
\b_darcs\b

# Avoid Makemaker generated and utility files.
\bMANIFEST\.bak
\bMakefile$
\bblib/
\bMakeMaker-\d
\bpm_to_blib\.ts$
\bpm_to_blib$
\bblibdirs\.ts$ # 6.18 through 6.25 generated this

# Avoid Module::Build generated and utility files.
\bBuild$
\b_build/

# Avoid temp and backup files.
~$
\.old$
\#$
\b\.#
\.bak$

# Avoid Devel::Cover files.
\bcover_db\b
#!end included /usr/lib/perl5/5.10.0/ExtUtils/MANIFEST.SKIP

# Avoid configuration metadata file
^MYMETA\.

# Avoid Module::Build generated and utility files.
\bBuild$
\bBuild.bat$
\b_build
\bBuild.COM$
\bBUILD.COM$
\bbuild.com$
^MANIFEST\.SKIP

# Avoid archives of this distribution
\bSailAlign-[\d\.\_]+

# Avoid versioning information
\B\.git\b
\B\.gitignore\b

# Avoid local links to binary files
^bin/*

# Avoild local tests and acoustic models
^support/test/local*
^support/resources/*
^models/ac_models/english/htk/*
^config/*
301 changes: 301 additions & 0 deletions config/timit_alignment.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
# Sail Tools configuration file
#use File::Spec::Functions;
$ROOTPATH = "/home/nassos/workspace/SailAlign-1.20";
$BIN_DIR = catdir( $ROOTPATH, 'bin' );
%cfg = (
experiment_id => $EXPERIMENT_ID,
working_dir => $WORKINGDIR,
features_directory => catdir( $WORKINGDIR, 'features', 'asr', 'htk' ),
bin_dir => $BIN_DIR,
vad_output_dir => 'vad',
alignment => {
do_adaptation => 1,
do_phon_alignment => 1,
do_forced_word_alignment => 1,
use_back_lm => 0,
max_utterance_duration => 10,
segmentation_script_suffix => 'cut',
utterance_list_suffix => 'utt',
wordlist => 'word.list',
dictionary => 'dictionary.dic',
language_model => 'language.model',
format => 'lab',
suffix => 'lab',
min_n_aligned_words => 6,
output_mode => 'words_uncertainties',
acoustic_model => {
defs => catdir(
$ROOTPATH,
'models/ac_models/english/htk/wsj_all_10000_32/hmmdefs'
),
macros => catdir(
$ROOTPATH,
'models/ac_models/english/htk/wsj_all_10000_32/macros'
),
tiedlist => catdir(
$ROOTPATH,
'models/ac_models/english/htk/wsj_all_10000_32/tiedlist'
),
},
word_forced_alignment => {
dir => catdir( $WORKINGDIR, 'word' ),
mlf => catfile( $WORKINGDIR, 'word', 'word_align.mlf' ),
file_list => catfile( $WORKINGDIR, 'word', 'files.list'),
utt_duration => 10,
tool => 'htk',
log_level => 1,
track_model_boundaries => 0,
binary => 'HVite',
bin_path => $BIN_DIR,
prune => '250.0 150.0 1000.0',
label_format => 'SM',
configuration_file =>
catfile( $WORKINGDIR, 'hvite_word_alignment.cfg' ),
triphone_context => 'T',
word_context => 'T',
sen_boundary => '<s>',
oov_symbol => '!!UNK',
},
phon_alignment => {
dir => catdir( $WORKINGDIR, 'phone' ),
mlf => catfile( $WORKINGDIR, 'phone', 'phone_align.mlf' ),
file_list => catfile( $WORKINGDIR, 'phone', 'files.list'),
utt_duration => 10,
tool => 'htk',
log_level => 1,
track_model_boundaries => 1,
binary => 'HVite',
bin_path => $BIN_DIR,
prune => '250.0 150.0 1000.0',
label_format => 'SW',
configuration_file =>
catfile( $WORKINGDIR, 'hvite_alignment.cfg' ),
triphone_context => 'T',
word_context => 'T',
sen_boundary => '<s>',
oov_symbol => '!!UNK',
},
recognition_output => "recognition.mlf",
alignment_accuracy => 0.999,
max_n_iterations => 5,
unaligned_text_filename => "unaligned.txt",
vad => {
method => 'None',
tool => 'vad',
frame_rate => 0.01,
frame_size => 0.02,
model => catdir( $ROOTPATH, "models/vad/MattModel.bin" ),
output => "voice_activity.out",
lab_file => "voice_activity.lab",
voice_labels => [ "VOICE", "SIL" ],
},
feature_extraction => {
feature_file_suffix => 'mfc',
configuration_file => 'feature_extraction.cfg',
tool => 'HCopy',
kind => 'MFCC_0_D_A_Z',
format => 'HTK',
rate => 0.01,
save_compressed => 'T',
save_with_crc => 'T',
window_size => 0.025,
use_hamming => 'T',
preemphasis_factor => 0.97,
n_filters => 26,
cepstral_liftering => 22,
n_cepstral_coefs => 12,
normalize_energy => 'F',
subtract_dc => 'T',
use_power => 'T',
log_level => 1,
},
segmentation => {
tool => 'ch_track',
bin_dir => $BIN_DIR,
format => 'HTK',
cut_file_suffix => 'cut',
segment_list_suffix => 'seg',
},
language_modeling => {
dictionary => {
tool => 'htk',
bin_dir => $BIN_DIR,
log_file_suffix => 'dlog',
reference => [
catfile( $ROOTPATH, 'language', 'cmu_dictionary.dic' ),
catfile( $ROOTPATH, 'language', 'timit_dictionary.dic' )
],
output_symbols_list =>
catfile( $ROOTPATH, 'language', 'output_symbols.list' ),
sp_model => 'sp',
sil_model => 'sil',
apply_phone_map => 1,
phone_map_direct => catfile( $ROOTPATH, 'language', 'timit2cmu_phones.map'),
phone_map_inverse => catfile( $ROOTPATH, 'language', 'cmu2timit_phones.map'),
file => catfile( $WORKINGDIR, 'dictionary' ),
},
text => {
root_path => catdir( $WORKINGDIR, 'text' ),
format => 'txt',
suffix => 'txt',
},
model => {
tool => 'srilm',
options => ['wbdiscount'],
format => 'sri',
bin_path => $BIN_DIR,
binary => 'ngram-count',
merge_binary => 'ngram',
suffix => 'lm',
order => 3,
path => catdir( $WORKINGDIR, 'lm' ),
oov_symbol => "!!UNK",
back_lm_weight => 0.1,
},
wordlist => catfile( $WORKINGDIR, 'wordlists', 'words.list' ),
unknown_wordlist => catfile( $WORKINGDIR, 'wordlists','unknown_words.list' ),
wordlist_suffix => 'wlist',
},
adaptation => {
src_acoustic_models => {
name => 'wsj_all_10000_32',
format => 'htk',
stats => 'stats_hmm82',
path => catdir(
$ROOTPATH, 'models',
'ac_models', 'english',
'htk', 'wsj_all_10000_32'
),
file => 'hmmdefs',
macros => 'macros',
list => 'tiedlist',
},
alignment => {
transcription_dir => catdir( $WORKINGDIR, 'adaptation' ),
mlf => catfile( $WORKINGDIR, 'adaptation', 'adaptation.mlf' ),
tool => 'htk',
log_level => 1,
track_model_boundaries => 1,
binary => 'HVite',
bin_path => $BIN_DIR,
prune => '250.0 150.0 1000.0',
label_format => 'SWT',
configuration_file =>
catfile( $WORKINGDIR, 'hvite_alignment.cfg' ),
triphone_context => 'T',
word_context => 'T',
sen_boundary => '<s>',
oov_symbol => '!!UNK',
},
regression_class_tree => {
binary => 'HHEd',
log_level => 1,
bin_path => $BIN_DIR,
n_classes => 32,
class_dir => catdir( $WORKINGDIR, 'adaptation', 'classes' ),
base_class => 'global',
hed_file => catfile( $WORKINGDIR, 'adaptation', 'regtree.hed' ),
},
path => catdir( $WORKINGDIR, 'adaptation' ),
transkind => 'MLLRMEAN',
binary => 'HERest',
log_level => 1,
class_dir => catdir( $WORKINGDIR, 'adaptation', 'classes' ),
base_class => 'global',
glob_trans_sfx => 'mllr1',
rc_trans_sfx => 'mllr2',
bin_path => $BIN_DIR,
glob_config_file =>
catfile( $WORKINGDIR, 'adaptation' . 'config.global' ),
rc_config_file =>
catfile( $WORKINGDIR, 'adaptation' . 'config.rc' ),
transforms_dir => catdir( $WORKINGDIR, 'adaptation', 'xforms' ),
transform_name_pattern => '*/%%*.mfc',
file_list =>
catfile( $WORKINGDIR, 'adaptation', 'adaptation_files.scp' ),
},
fsg => {
directory => catdir( $WORKINGDIR, 'fsg' ),
sen_boundary_phon => 'sil',
bin_path => $BIN_DIR,
binary => 'HParse',
sen_start => 'SENSTART',
sen_end => 'SENEND',
oov_symbol => '!!UNK',
},
recognition => {
acoustic_models => {
name => 'wsj_all_10000_32',
format => 'htk',
stats => 'stats_hmm82',
path => catdir(
$ROOTPATH, 'models',
'ac_models', 'english',
'htk', 'wsj_all_10000_32'
),
file => 'hmmdefs',
macros => 'macros',
list => 'tiedlist',
phone_set => catfile( $ROOTPATH, 'language', 'phones.list'),
},
alignment => {
transcription_dir => catdir( $WORKINGDIR, 'adaptation' ),
mlf => catfile( $WORKINGDIR, 'adaptation', 'adaptation.mlf' ),
tool => 'htk',
log_level => 1,
binary => 'HVite',
bin_path => $BIN_DIR,
prune => '250.0 150.0 1000.0',
configuration_file =>
catfile( $WORKINGDIR, 'hvite_alignment.cfg' ),
triphone_context => 'T',
word_context => 'T',
sen_boundary => '<s>',
},
lm_conversion => {
do_convert => 0,
binary => 'HBuild',
bin_path => $BIN_DIR,
raw_mit_format => 'T',
configuration_file => 'hbuild.cfg',
log_level => 1,
},
language_model => {
format => 'htk',
utterance_delimiters => [ '<s>', '</s>' ],
oov_symbol => "!!UNK",
suffix => 'htklm',
path => catdir( $WORKINGDIR, 'lm' ),
},
results => {
root_path => catdir( $WORKINGDIR, 'asr' ),
format => 'lab',
suffix => 'rec',
list => 'results.list',
file => catfile( $WORKINGDIR, 'asr', 'results.lab' )
},
log_level => 1,
working_dir => $WORKINGDIR,
filtered_models => [ 'sil', 'sp' ],
filter_transcriptions => 1,
tool => 'htk',
binary => 'HDecode',
bin_path => $BIN_DIR,
prune => '200.0 150.0 1000.0',
insert_pen => '-4.0',
lm_scale => '15.0',
configuration_file => catfile( $WORKINGDIR, 'asr.cfg' ),
triphone_context => 'T',
word_context => 'T',
},
},
text => {
format => 'LAB',
speaker_labels => [ 'H', 'W' ],
voice_labels => [ 'SIL', 'VOICE' ],
word_corrections_map =>
catfile( $ROOTPATH, 'language', 'corrections.map' ),
word_deletions_list =>
catfile( $ROOTPATH, 'language', 'deletions.list' ),
},
);
20 changes: 20 additions & 0 deletions support/patches/audio_wav_read.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--- Wav/Read.pm 2012-03-31 00:13:15.000000000 +0300
+++ /usr/lib/perl5/site_perl/5.10.0/Audio/Wav/Read.pm 2013-08-30 14:08:14.000000000 +0300
@@ -246,7 +246,7 @@
=cut

# read is generated by _init_read_sub
-sub read { die "ERROR: can't call read without first calling _init_read_sub"; };
+#sub read { die "ERROR: can't call read without first calling _init_read_sub"; };

sub _init_read_sub {
my $self = shift;
@@ -300,7 +300,7 @@
$self -> {read_sub_string} = q[
sub {
my $val;
- $self -> {pos} += read( $handle, $val, $block );
+ $self -> {pos} += Core::read( $handle, $val, $block );
return unless defined $val;
] . $read_op . q[
};

0 comments on commit abc262a

Please sign in to comment.