Merge branch 'dev'

JoFrhwld · Feb 19, 2015 · 049aa62 · 049aa62
2 parents 0c7f71c + 5ebbbc1
commit 049aa62
Show file tree

Hide file tree

Showing 4 changed files with 125 additions and 66 deletions.
diff --git a/FAVE-align/FAAValign.py b/FAVE-align/FAAValign.py
@@ -56,6 +56,9 @@
 
 -n, --noprompt ("no prompt"):
 
+-t HTKTOOLSPATH, --htktoolspath=HTKTOOLSPATH
+    Specifies the path to the HTKTools directory where the HTK executable files are located.  If not specified, the user's path will be searched for the location of the executable.
+
     User is not prompted for the transcription of words not in the dictionary, or truncated words.  Unknown words are ignored by the aligner.
 """
 
@@ -77,6 +80,7 @@
 import traceback
 import codecs
 import subprocess
+import string
 
 truncated = re.compile(r'\w+\-$')                       ## truncated words
 intended = re.compile(r'^\+\w+')                        ## intended word (inserted by transcribers after truncated word)
@@ -503,6 +507,23 @@ def check_transcription(w):
 
     return final_trans
 
+# substitute any 'smart' quotes in the input file with the corresponding
+# ASCII equivalents (otherwise they will be excluded as out-of-
+# vocabulary with respect to the CMU pronouncing dictionary)
+# WARNING: this function currently only works for UTF-8 input
+def replace_smart_quotes(all_input):
+  cleaned_lines = []
+  for line in all_input:
+    line = line.replace(u'\u2018', "'")
+    line = line.replace(u'\u2019', "'")
+    line = line.replace(u'\u201a', "'")
+    line = line.replace(u'\u201b', "'")
+    line = line.replace(u'\u201c', '"')
+    line = line.replace(u'\u201d', '"')
+    line = line.replace(u'\u201e', '"')
+    line = line.replace(u'\u201f', '"')
+    cleaned_lines.append(line)
+  return cleaned_lines
 
 def check_transcription_file(all_input):
     """checks the format of the input transcription file and returns a list of empty lines to be deleted from the input"""
@@ -713,13 +734,15 @@ def define_options_and_arguments():
     verbose_help = """Detailed output on status of dictionary check and alignment progress."""
     dict_help = """Specifies the name of the file containing the pronunciation dictionary.  Default file is "/model/dict"."""
     noprompt_help = """User is not prompted for the transcription of words not in the dictionary, or truncated words.  Unknown words are ignored by the aligner."""
+    htktoolspath_help = """Specifies the path to the HTKTools directory where the HTK executable files are located.  If not specified, the user's path will be searched for the location of the executable."""
 
     parser = optparse.OptionParser(usage=new_use, description=new_desc, epilog=new_ep, version=vers)
     parser.add_option('-c', '--check', help=check_help, metavar='FILENAME')                        ## required argument FILENAME
     parser.add_option('-i', '--import', help=import_help, metavar='FILENAME', dest='importfile')   ## required argument FILENAME
     parser.add_option('-v', '--verbose', action='store_true', default=False, help=verbose_help)
     parser.add_option('-d', '--dict', default='model/dict', help=dict_help, metavar='FILENAME')
     parser.add_option('-n', '--noprompt', action='store_true', default=False, help=noprompt_help)
+    parser.add_option('-t', '--htktoolspath', default='', help=htktoolspath_help, metavar='HTKTOOLSPATH')
 
     ## After parsing with (options, args) = parser.parse_args(), options are accessible via
     ## - string options.check (default:  None)
@@ -1150,22 +1173,23 @@ def read_transcription_file(trsfile):
 
     try:  ## try UTF-16 encoding first
         t = codecs.open(trsfile, 'rU', encoding='utf-16')
-        lines = t.readlines()
         print "Encoding is UTF-16!"
+        lines = t.readlines()
     except UnicodeError:
         try:  ## then UTF-8...
             t = codecs.open(trsfile, 'rU', encoding='utf-8')
-            lines = t.readlines()
             print "Encoding is UTF-8!"
+            lines = t.readlines()
+            lines = replace_smart_quotes(lines)
         except UnicodeError:
             try:  ## then Windows encoding...
                 t = codecs.open(trsfile, 'rU', encoding='windows-1252')
-                lines = t.readlines()
                 print "Encoding is Windows-1252!"
+                lines = t.readlines()
             except UnicodeError:
                 t = open(trsfile, 'rU')
-                lines = t.readlines()
                 print "Encoding is ASCII!"
+                lines = t.readlines()
 
     return lines
 
@@ -1454,7 +1478,7 @@ def write_words(out, unknown):
 ################################################################################
 
 
-def FAAValign(opts, args, FADIR='', SOXPATH='', HTKTOOLSPATH=''):
+def FAAValign(opts, args, FADIR='', SOXPATH=''):
     """runs the forced aligner for the arguments given"""
 
     tempdir = os.path.join(FADIR, TEMPDIR)
@@ -1501,6 +1525,8 @@ def FAAValign(opts, args, FADIR='', SOXPATH='', HTKTOOLSPATH=''):
     style_tier = None
     failed_alignment = []
 
+    HTKTOOLSPATH = options.htktoolspath
+
     ## check correct format of input file; get list of transcription lines
     ## (this function skips empty annotation units -> lines to be deleted)
     if options.verbose:  

diff --git a/FAVE-extract/bin/extractFormants.py b/FAVE-extract/bin/extractFormants.py
@@ -1339,9 +1339,9 @@ def outputFormantSettings(measurements, speaker, outputFile):
     count = {}
     for code in plotnik.PLOTNIKCODES:
         for nf in range(3, 7):
-            count[(int(code), nf)] = 0
+            count[(str(code), nf)] = 0
     for vm in measurements:
-        count[(int(vm.cd), int(vm.nFormants))] += 1
+        count[(str(vm.cd), int(vm.nFormants))] += 1
 
     # filename = name of the output file, but with extension "nFormants"
     outfilename = os.path.splitext(outputFile)[0] + ".nFormants"
@@ -1355,7 +1355,7 @@ def outputFormantSettings(measurements, speaker, outputFile):
     for code in plotnik.PLOTNIKCODES:
         f.write(code)
         for nf in range(3, 7):
-            f.write('\t' + str(count[(int(code), nf)]))
+            f.write('\t' + str(count[(str(code), nf)]))
         f.write('\n')
     f.close()
 
@@ -1601,61 +1601,80 @@ def predictF1F2(phone, selectedpoles, selectedbandwidths, means, covs):
     distances = []
         # this list keeps track of the corresponding value of the Mahalanobis distance
     # for all values of nFormants:
-    for poles, bandwidths in zip(selectedpoles, selectedbandwidths):
-        # check that there are at least two formants in the selected frame
-        if len(poles) >= 2:
-            # nPoles = len(poles)     ## number of poles
-            # check all possible combinations of F1, F2, F3:
-            # for i in range(min([nPoles - 1, 2])):
-            #    for j in range(i+1, min([nPoles, 3])):
-                    i = 0
-                    j = 1
-                    # vector with current pole combination and associated
-                    # bandwidths
-                    x = np.array([poles[i], poles[j], math.log(bandwidths[i]), math.log(bandwidths[j])])
-                    # calculate Mahalanobis distance between x and ANAE mean
-                    dist = mahalanobis(x, means[vowel], covs[vowel])
-                    # append poles and bandwidths to list of values
-                    # (if F3 and bandwidth measurements exist, add to list of appended values)
-                    if len(poles) > 2:
-                        values.append(
-                            [poles[i], poles[j], bandwidths[i], bandwidths[j], poles[2], bandwidths[2]])
-                    else:
-                        values.append([poles[i], poles[j], bandwidths[i], bandwidths[j], '', ''])
-                    # append corresponding Mahalanobis distance to list of
-                    # distances
-                    distances.append(dist)
-        # we need to append something to the distances and values lists so that the winnerIndex still corresponds with nFormants!
-        # (this is for the case that the selected formant frame only contains F1 - empty string will not be selected as minimum distance)
-        else:
-            # if there are gaps in the formant tracks and the vowel duration is
-            # short, the whole formant track may disappear during smoothing
-            if len(poles) == 1 and len(bandwidths) == 1:
-                values.append([poles[0], '', bandwidths[0], '', '', ''])
+    if vowel in means:
+        for poles, bandwidths in zip(selectedpoles, selectedbandwidths):
+            # check that there are at least two formants in the selected frame
+            if len(poles) >= 2:
+                # nPoles = len(poles)     ## number of poles
+                # check all possible combinations of F1, F2, F3:
+                # for i in range(min([nPoles - 1, 2])):
+                #    for j in range(i+1, min([nPoles, 3])):
+                        i = 0
+                        j = 1
+                        # vector with current pole combination and associated
+                        # bandwidths
+                        x = np.array([poles[i], poles[j], math.log(bandwidths[i]), math.log(bandwidths[j])])
+                        # calculate Mahalanobis distance between x and ANAE mean
+                        dist = mahalanobis(x, means[vowel], covs[vowel])
+                        # append poles and bandwidths to list of values
+                        # (if F3 and bandwidth measurements exist, add to list of appended values)
+                        if len(poles) > 2:
+                            values.append(
+                                [poles[i], poles[j], bandwidths[i], bandwidths[j], poles[2], bandwidths[2]])
+                        else:
+                            values.append([poles[i], poles[j], bandwidths[i], bandwidths[j], '', ''])
+                        # append corresponding Mahalanobis distance to list of
+                        # distances
+                        distances.append(dist)
+            # we need to append something to the distances and values lists so that the winnerIndex still corresponds with nFormants!
+            # (this is for the case that the selected formant frame only contains F1 - empty string will not be selected as minimum distance)
             else:
-                values.append(['', '', '', '', '', ''])
-            distances.append('')
-    # get index for minimum Mahalanobis distance
-    winnerIndex = distances.index(min(distances))
-    # get corresponding F1, F2 and bandwidths values
-    f1 = values[winnerIndex][0]
-    f2 = values[winnerIndex][1]
-    f3 = values[winnerIndex][4]
-    # if there is a "gap" in the wave form at the point of measurement, the bandwidths returned will be empty,
-    # and the following will cause an error...
-    if values[winnerIndex][2]:
-        b1 = values[winnerIndex][2]
-    else:
-        b1 = ''
-    if values[winnerIndex][3]:
-        b2 = values[winnerIndex][3]
-    else:
-        b2 = ''
-    if values[winnerIndex][5]:
-        b3 = values[winnerIndex][5]
+                # if there are gaps in the formant tracks and the vowel duration is
+                # short, the whole formant track may disappear during smoothing
+                if len(poles) == 1 and len(bandwidths) == 1:
+                    values.append([poles[0], '', bandwidths[0], '', '', ''])
+                else:
+                    values.append(['', '', '', '', '', ''])
+                distances.append('')
+        # get index for minimum Mahalanobis distance
+        winnerIndex = distances.index(min(distances))
+        # get corresponding F1, F2 and bandwidths values
+        f1 = values[winnerIndex][0]
+        f2 = values[winnerIndex][1]
+        f3 = values[winnerIndex][4]
+        # if there is a "gap" in the wave form at the point of measurement, the bandwidths returned will be empty,
+        # and the following will cause an error...
+        if values[winnerIndex][2]:
+            b1 = values[winnerIndex][2]
+        else:
+            b1 = ''
+        if values[winnerIndex][3]:
+            b2 = values[winnerIndex][3]
+        else:
+            b2 = ''
+        if values[winnerIndex][5]:
+            b3 = values[winnerIndex][5]
+        else:
+            b3 = ''
+        # return tuple of measurements
     else:
-        b3 = ''
-    # return tuple of measurements
+        winnerIndex = 2
+        f1 = selectedpoles[2][0]
+        f2 = selectedpoles[2][1]
+        f3 = selectedpoles[2][2]
+        if selectedbandwidths[2][0]:
+            b1 = selectedbandwidths[2][0]
+        else:
+            b1 = ''
+        if selectedbandwidths[2][1]:
+            b2 = selectedbandwidths[2][1]
+        else:
+            b2 = ''
+        if selectedbandwidths[2][2]:
+            b3 = selectedbandwidths[2][2]
+        else:
+            b3 = ''
+
     return (f1, f2, f3, b1, b2, b3, winnerIndex)
 
 

diff --git a/FAVE-extract/bin/plotnik.py b/FAVE-extract/bin/plotnik.py
@@ -56,7 +56,7 @@
 # Plotnik vowel classes (in the order that they appear in the Plotnik side bar)
 PLOTNIKCODES = [
     '1', '2', '3', '5', '6', '7', '8', '11', '12', '21', '22', '41', '47', '61', '82',
-    '72', '73', '62', '63', '42', '33', '43', '53', '14', '24', '44', '54', '64', '74', '94', '31', '39']
+    '72', '73', '62', '63', '42', '33', '43', '53', '14', '24', '44', '54', '64', '74', '94', '31', '39', '*']
 
 # ARPABET phonesets
 CONSONANTS = ['B', 'CH', 'D', 'DH', 'F', 'G', 'HH', 'JH', 'K', 'L', 'M',
@@ -115,9 +115,10 @@ class VowelMeasurement:
     t = 0  # time of measurement
 
 
-def arpabet2plotnik(ac, trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs):
+def arpabet2plotnik(ac, stress, trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs):
     """translates Arpabet transcription of vowels into codes for Plotnik vowel classes"""
     # ac = Arpabet coding (without stress digit)
+    # stress = stress digit
     # trans = (orthographic) transcription of token
     # prec_p = preceding phone
     # foll_p = following phone
@@ -152,6 +153,9 @@ def arpabet2plotnik(ac, trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs):
     elif foll_p != ''and phoneset[foll_p].ctype == 'r' and ac != 'ER':
         pc = A2P_R[ac]
     # all other cases:
+
+    elif ac == "AH" and stress == '0':
+        pc = "*"
     else:
         pc = A2P[ac]
 
@@ -244,8 +248,11 @@ def cmu2plotnik_code(i, phones, trans, phoneset, speaker, vowelSystem):
 
     # convert CMU (Arpabet) transcription into Plotnik code
     # ("label[:-1]":  without stress digit)
-    code = arpabet2plotnik(re.findall(r'^([A-Z]{2,2})\d?$', phones[i].label.upper())[
-                           0], trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs)
+    code = arpabet2plotnik(re.findall(r'^([A-Z]{2,2})\d?$', 
+                                      phones[i].label.upper())[0], 
+                            re.findall(r'^[A-Z]+(\d)$',
+                                       phones[i].label.upper())[0],
+                            trans, prec_p, foll_p, phoneset, fm, fp, fv, ps, fs)
 
     # adjust vowel class assignment for Philadelphia system
 #  try:
@@ -772,7 +779,7 @@ def plt_vowels(cd):
                   '5':"o",
                   '6':"uh",
                   '7':"u",
-                  '*':"*",
+                  '*':"@",
                   '11':"iy",
                   '12':"iyF",
                   '21':"ey",

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,12 @@
 NEWS
 
+dev
+
+* AH0 is now mapped to schwa (`plt_vclass = @`) instead of wedge. (@jofrhwld)
+* When a vowel class isn't in means.txt or covs.txt, default to `nFormants = 5` for first pass. This was necessary to support schwa (@jofrhwld)
+
+* 
+
 v1.2
 
 * Two major updates to user interface with FAVE-extract. (@jofrhwld)