Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
JoFrhwld committed Feb 19, 2015
2 parents 0c7f71c + 5ebbbc1 commit 049aa62
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 66 deletions.
36 changes: 31 additions & 5 deletions FAVE-align/FAAValign.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@
-n, --noprompt ("no prompt"):
-t HTKTOOLSPATH, --htktoolspath=HTKTOOLSPATH
Specifies the path to the HTKTools directory where the HTK executable files are located. If not specified, the user's path will be searched for the location of the executable.
User is not prompted for the transcription of words not in the dictionary, or truncated words. Unknown words are ignored by the aligner.
"""

Expand All @@ -77,6 +80,7 @@
import traceback
import codecs
import subprocess
import string

truncated = re.compile(r'\w+\-$') ## truncated words
intended = re.compile(r'^\+\w+') ## intended word (inserted by transcribers after truncated word)
Expand Down Expand Up @@ -503,6 +507,23 @@ def check_transcription(w):

return final_trans

# substitute any 'smart' quotes in the input file with the corresponding
# ASCII equivalents (otherwise they will be excluded as out-of-
# vocabulary with respect to the CMU pronouncing dictionary)
# WARNING: this function currently only works for UTF-8 input
def replace_smart_quotes(all_input):
cleaned_lines = []
for line in all_input:
line = line.replace(u'\u2018', "'")
line = line.replace(u'\u2019', "'")
line = line.replace(u'\u201a', "'")
line = line.replace(u'\u201b', "'")
line = line.replace(u'\u201c', '"')
line = line.replace(u'\u201d', '"')
line = line.replace(u'\u201e', '"')
line = line.replace(u'\u201f', '"')
cleaned_lines.append(line)
return cleaned_lines

def check_transcription_file(all_input):
"""checks the format of the input transcription file and returns a list of empty lines to be deleted from the input"""
Expand Down Expand Up @@ -713,13 +734,15 @@ def define_options_and_arguments():
verbose_help = """Detailed output on status of dictionary check and alignment progress."""
dict_help = """Specifies the name of the file containing the pronunciation dictionary. Default file is "/model/dict"."""
noprompt_help = """User is not prompted for the transcription of words not in the dictionary, or truncated words. Unknown words are ignored by the aligner."""
htktoolspath_help = """Specifies the path to the HTKTools directory where the HTK executable files are located. If not specified, the user's path will be searched for the location of the executable."""

parser = optparse.OptionParser(usage=new_use, description=new_desc, epilog=new_ep, version=vers)
parser.add_option('-c', '--check', help=check_help, metavar='FILENAME') ## required argument FILENAME
parser.add_option('-i', '--import', help=import_help, metavar='FILENAME', dest='importfile') ## required argument FILENAME
parser.add_option('-v', '--verbose', action='store_true', default=False, help=verbose_help)
parser.add_option('-d', '--dict', default='model/dict', help=dict_help, metavar='FILENAME')
parser.add_option('-n', '--noprompt', action='store_true', default=False, help=noprompt_help)
parser.add_option('-t', '--htktoolspath', default='', help=htktoolspath_help, metavar='HTKTOOLSPATH')

## After parsing with (options, args) = parser.parse_args(), options are accessible via
## - string options.check (default: None)
Expand Down Expand Up @@ -1150,22 +1173,23 @@ def read_transcription_file(trsfile):

try: ## try UTF-16 encoding first
t = codecs.open(trsfile, 'rU', encoding='utf-16')
lines = t.readlines()
print "Encoding is UTF-16!"
lines = t.readlines()
except UnicodeError:
try: ## then UTF-8...
t = codecs.open(trsfile, 'rU', encoding='utf-8')
lines = t.readlines()
print "Encoding is UTF-8!"
lines = t.readlines()
lines = replace_smart_quotes(lines)
except UnicodeError:
try: ## then Windows encoding...
t = codecs.open(trsfile, 'rU', encoding='windows-1252')
lines = t.readlines()
print "Encoding is Windows-1252!"
lines = t.readlines()
except UnicodeError:
t = open(trsfile, 'rU')
lines = t.readlines()
print "Encoding is ASCII!"
lines = t.readlines()

return lines

Expand Down Expand Up @@ -1454,7 +1478,7 @@ def write_words(out, unknown):
################################################################################


def FAAValign(opts, args, FADIR='', SOXPATH='', HTKTOOLSPATH=''):
def FAAValign(opts, args, FADIR='', SOXPATH=''):
"""runs the forced aligner for the arguments given"""

tempdir = os.path.join(FADIR, TEMPDIR)
Expand Down Expand Up @@ -1501,6 +1525,8 @@ def FAAValign(opts, args, FADIR='', SOXPATH='', HTKTOOLSPATH=''):
style_tier = None
failed_alignment = []

HTKTOOLSPATH = options.htktoolspath

## check correct format of input file; get list of transcription lines
## (this function skips empty annotation units -> lines to be deleted)
if options.verbose:
Expand Down
131 changes: 75 additions & 56 deletions FAVE-extract/bin/extractFormants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,9 +1339,9 @@ def outputFormantSettings(measurements, speaker, outputFile):
count = {}
for code in plotnik.PLOTNIKCODES:
for nf in range(3, 7):
count[(int(code), nf)] = 0
count[(str(code), nf)] = 0
for vm in measurements:
count[(int(vm.cd), int(vm.nFormants))] += 1
count[(str(vm.cd), int(vm.nFormants))] += 1

# filename = name of the output file, but with extension "nFormants"
outfilename = os.path.splitext(outputFile)[0] + ".nFormants"
Expand All @@ -1355,7 +1355,7 @@ def outputFormantSettings(measurements, speaker, outputFile):
for code in plotnik.PLOTNIKCODES:
f.write(code)
for nf in range(3, 7):
f.write('\t' + str(count[(int(code), nf)]))
f.write('\t' + str(count[(str(code), nf)]))
f.write('\n')
f.close()

Expand Down Expand Up @@ -1601,61 +1601,80 @@ def predictF1F2(phone, selectedpoles, selectedbandwidths, means, covs):
distances = []
# this list keeps track of the corresponding value of the Mahalanobis distance
# for all values of nFormants:
for poles, bandwidths in zip(selectedpoles, selectedbandwidths):
# check that there are at least two formants in the selected frame
if len(poles) >= 2:
# nPoles = len(poles) ## number of poles
# check all possible combinations of F1, F2, F3:
# for i in range(min([nPoles - 1, 2])):
# for j in range(i+1, min([nPoles, 3])):
i = 0
j = 1
# vector with current pole combination and associated
# bandwidths
x = np.array([poles[i], poles[j], math.log(bandwidths[i]), math.log(bandwidths[j])])
# calculate Mahalanobis distance between x and ANAE mean
dist = mahalanobis(x, means[vowel], covs[vowel])
# append poles and bandwidths to list of values
# (if F3 and bandwidth measurements exist, add to list of appended values)
if len(poles) > 2:
values.append(
[poles[i], poles[j], bandwidths[i], bandwidths[j], poles[2], bandwidths[2]])
else:
values.append([poles[i], poles[j], bandwidths[i], bandwidths[j], '', ''])
# append corresponding Mahalanobis distance to list of
# distances
distances.append(dist)
# we need to append something to the distances and values lists so that the winnerIndex still corresponds with nFormants!
# (this is for the case that the selected formant frame only contains F1 - empty string will not be selected as minimum distance)
else:
# if there are gaps in the formant tracks and the vowel duration is
# short, the whole formant track may disappear during smoothing
if len(poles) == 1 and len(bandwidths) == 1:
values.append([poles[0], '', bandwidths[0], '', '', ''])
if vowel in means:
for poles, bandwidths in zip(selectedpoles, selectedbandwidths):
# check that there are at least two formants in the selected frame
if len(poles) >= 2:
# nPoles = len(poles) ## number of poles
# check all possible combinations of F1, F2, F3:
# for i in range(min([nPoles - 1, 2])):
# for j in range(i+1, min([nPoles, 3])):
i = 0
j = 1
# vector with current pole combination and associated
# bandwidths
x = np.array([poles[i], poles[j], math.log(bandwidths[i]), math.log(bandwidths[j])])
# calculate Mahalanobis distance between x and ANAE mean
dist = mahalanobis(x, means[vowel], covs[vowel])
# append poles and bandwidths to list of values
# (if F3 and bandwidth measurements exist, add to list of appended values)
if len(poles) > 2:
values.append(
[poles[i], poles[j], bandwidths[i], bandwidths[j], poles[2], bandwidths[2]])
else:
values.append([poles[i], poles[j], bandwidths[i], bandwidths[j], '', ''])
# append corresponding Mahalanobis distance to list of
# distances
distances.append(dist)
# we need to append something to the distances and values lists so that the winnerIndex still corresponds with nFormants!
# (this is for the case that the selected formant frame only contains F1 - empty string will not be selected as minimum distance)
else:
values.append(['', '', '', '', '', ''])
distances.append('')
# get index for minimum Mahalanobis distance
winnerIndex = distances.index(min(distances))
# get corresponding F1, F2 and bandwidths values
f1 = values[winnerIndex][0]
f2 = values[winnerIndex][1]
f3 = values[winnerIndex][4]
# if there is a "gap" in the wave form at the point of measurement, the bandwidths returned will be empty,
# and the following will cause an error...
if values[winnerIndex][2]:
b1 = values[winnerIndex][2]
else:
b1 = ''
if values[winnerIndex][3]:
b2 = values[winnerIndex][3]
else:
b2 = ''
if values[winnerIndex][5]:
b3 = values[winnerIndex][5]
# if there are gaps in the formant tracks and the vowel duration is
# short, the whole formant track may disappear during smoothing
if len(poles) == 1 and len(bandwidths) == 1:
values.append([poles[0], '', bandwidths[0], '', '', ''])
else:
values.append(['', '', '', '', '', ''])
distances.append('')
# get index for minimum Mahalanobis distance
winnerIndex = distances.index(min(distances))
# get corresponding F1, F2 and bandwidths values
f1 = values[winnerIndex][0]
f2 = values[winnerIndex][1]
f3 = values[winnerIndex][4]
# if there is a "gap" in the wave form at the point of measurement, the bandwidths returned will be empty,
# and the following will cause an error...
if values[winnerIndex][2]:
b1 = values[winnerIndex][2]
else:
b1 = ''
if values[winnerIndex][3]:
b2 = values[winnerIndex][3]
else:
b2 = ''
if values[winnerIndex][5]:
b3 = values[winnerIndex][5]
else:
b3 = ''
# return tuple of measurements
else:
b3 = ''
# return tuple of measurements
winnerIndex = 2
f1 = selectedpoles[2][0]
f2 = selectedpoles[2][1]
f3 = selectedpoles[2][2]
if selectedbandwidths[2][0]:
b1 = selectedbandwidths[2][0]
else:
b1 = ''
if selectedbandwidths[2][1]:
b2 = selectedbandwidths[2][1]
else:
b2 = ''
if selectedbandwidths[2][2]:
b3 = selectedbandwidths[2][2]
else:
b3 = ''

return (f1, f2, f3, b1, b2, b3, winnerIndex)


Expand Down
17 changes: 12 additions & 5 deletions FAVE-extract/bin/plotnik.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
# Plotnik vowel classes (in the order that they appear in the Plotnik side bar)
PLOTNIKCODES = [
'1', '2', '3', '5', '6', '7', '8', '11', '12', '21', '22', '41', '47', '61', '82',
'72', '73', '62', '63', '42', '33', '43', '53', '14', '24', '44', '54', '64', '74', '94', '31', '39']
'72', '73', '62', '63', '42', '33', '43', '53', '14', '24', '44', '54', '64', '74', '94', '31', '39', '*']

# ARPABET phonesets
CONSONANTS = ['B', 'CH', 'D', 'DH', 'F', 'G', 'HH', 'JH', 'K', 'L', 'M',
Expand Down Expand Up @@ -115,9 +115,10 @@ class VowelMeasurement:
t = 0 # time of measurement


def arpabet2plotnik(ac, trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs):
def arpabet2plotnik(ac, stress, trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs):
"""translates Arpabet transcription of vowels into codes for Plotnik vowel classes"""
# ac = Arpabet coding (without stress digit)
# stress = stress digit
# trans = (orthographic) transcription of token
# prec_p = preceding phone
# foll_p = following phone
Expand Down Expand Up @@ -152,6 +153,9 @@ def arpabet2plotnik(ac, trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs):
elif foll_p != ''and phoneset[foll_p].ctype == 'r' and ac != 'ER':
pc = A2P_R[ac]
# all other cases:

elif ac == "AH" and stress == '0':
pc = "*"
else:
pc = A2P[ac]

Expand Down Expand Up @@ -244,8 +248,11 @@ def cmu2plotnik_code(i, phones, trans, phoneset, speaker, vowelSystem):

# convert CMU (Arpabet) transcription into Plotnik code
# ("label[:-1]": without stress digit)
code = arpabet2plotnik(re.findall(r'^([A-Z]{2,2})\d?$', phones[i].label.upper())[
0], trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs)
code = arpabet2plotnik(re.findall(r'^([A-Z]{2,2})\d?$',
phones[i].label.upper())[0],
re.findall(r'^[A-Z]+(\d)$',
phones[i].label.upper())[0],
trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs)

# adjust vowel class assignment for Philadelphia system
# try:
Expand Down Expand Up @@ -772,7 +779,7 @@ def plt_vowels(cd):
'5':"o",
'6':"uh",
'7':"u",
'*':"*",
'*':"@",
'11':"iy",
'12':"iyF",
'21':"ey",
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
NEWS

dev

* AH0 is now mapped to schwa (`plt_vclass = @`) instead of wedge. (@jofrhwld)
* When a vowel class isn't in means.txt or covs.txt, default to `nFormants = 5` for first pass. This was necessary to support schwa (@jofrhwld)

*

v1.2

* Two major updates to user interface with FAVE-extract. (@jofrhwld)
Expand Down

0 comments on commit 049aa62

Please sign in to comment.