From 17af9b5d060490ecc7a0955f1b3f4ded97f9f69d Mon Sep 17 00:00:00 2001
From: Bai Li <bai@cs.toronto.edu>
Date: Fri, 28 Sep 2018 01:38:23 -0400
Subject: [PATCH] Pipeline for Praat syllable nuclei script

---
 README.md                        |   6 +
 config.py                        |   1 +
 nodes/audio.py                   |  22 ++++
 pipelines/pipelines.py           |  11 +-
 scripts/syllable_nuclei_v2.praat | 188 +++++++++++++++++++++++++++++++
 5 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100644 scripts/syllable_nuclei_v2.praat

diff --git a/README.md b/README.md
index 29e261e..84c2ca2 100644
--- a/README.md
+++ b/README.md
@@ -205,6 +205,12 @@ path_to_rst_python=path/to/deps/RST/rstenv/bin/python
 path_to_rst=path/to/deps/RST/src/
 ```
 
+##### Praat Syllable Nuclei Detection
+Add the following line to 'config.ini':
+```bash
+path_to_praat=path/to/deps/praat_barren
+```
+
 
 
 
diff --git a/config.py b/config.py
index f4420a5..caf3eeb 100644
--- a/config.py
+++ b/config.py
@@ -43,6 +43,7 @@ def _get_var(key, default=DEFAULT_NOT_SET):
 path_to_stanford_cp = _get_var("path_to_stanford_cp")
 path_to_lda_model = _get_var("path_to_lda_model")
 path_to_lda_wordids = _get_var("path_to_lda_wordids")
+path_to_praat = _get_var("path_to_praat", None)
 
 
 nltk_data = _get_var("NLTK_DATA", None)
diff --git a/nodes/audio.py b/nodes/audio.py
index c775ae8..db69f0a 100644
--- a/nodes/audio.py
+++ b/nodes/audio.py
@@ -2,12 +2,14 @@
 from abc import ABC, abstractmethod
 import os
 import logging
+import subprocess
 
 from nodes.helper import FileOutputNode
 from utils import file_utils
 from utils import signal_processing as sp
 from utils.shell_run import shell_run
 from config import OPENSMILE_HOME
+from config import path_to_praat
 
 class Mp3ToWav(FileOutputNode):
     def run(self, mp3_file):
@@ -183,3 +185,23 @@ def run(self, in_file):
                     f.write(extra_info)
 
             self.emit([seg_path, extra_path])
+
+
+class PraatRunner(FileOutputNode):
+    def run(self, in_file):
+        self.log(logging.INFO, "Starting %s" % (in_file))
+
+        out_file = self.derive_new_file_path(in_file, 'csv')
+
+        if file_utils.should_run(in_file, out_file):
+            cmd = [path_to_praat, '--run', 'scripts/syllable_nuclei_v2.praat', in_file]
+            with open(out_file, 'w') as out_file_handle:
+                res = subprocess.call(cmd, stdout=out_file_handle)
+
+            if res != 0:
+                self.log(logging.ERROR,"Failed %s -> %s with error code %i. cmd: %s" % (in_file, out_file, res, " ".join(cmd)))
+                return
+
+            self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file))
+
+        self.emit([out_file])
diff --git a/pipelines/pipelines.py b/pipelines/pipelines.py
index e0c2e7b..adcb1cb 100644
--- a/pipelines/pipelines.py
+++ b/pipelines/pipelines.py
@@ -69,4 +69,13 @@ def lex(in_folder, out_folder, num_threads):
 
     p = ProgressPipeline(file_finder | feats, n_threads=num_threads, quiet=True)
 
-    return p
\ No newline at end of file
+    return p
+
+@pipeline_registry
+def praat_syllable_nuclei(in_folder, out_folder, num_threads):
+    file_finder = helper.FindFiles("file_finder", dir=in_folder, ext=".wav")
+
+    praat = audio.PraatRunner("praat_syllable_nuclei", out_dir=out_folder)
+    p = ProgressPipeline(file_finder | praat, n_threads=num_threads, quiet=True)
+
+    return p
diff --git a/scripts/syllable_nuclei_v2.praat b/scripts/syllable_nuclei_v2.praat
new file mode 100644
index 0000000..d86a870
--- /dev/null
+++ b/scripts/syllable_nuclei_v2.praat
@@ -0,0 +1,188 @@
+# Modified from Praat Script Syllable Nuclei v2
+# https://sites.google.com/site/speechrate/Home/praat-script-syllable-nuclei-v2
+
+form GetFileName command line calls
+    sentence infile
+endform
+
+
+# Default parameters
+silencedb = -25
+mindip = 2
+showtext = 1
+minpause = 0.3
+ 
+# print a single header line with column names and units
+printline soundname, nsyll, npause, dur (s), phonationtime (s), speechrate (nsyll/dur), articulation rate (nsyll / phonationtime), ASD (speakingtime/nsyll)
+
+# read the file
+Read from file... 'infile$'
+
+# use object ID
+soundname$ = selected$("Sound")
+soundid = selected("Sound")
+
+originaldur = Get total duration
+# allow non-zero starting time
+bt = Get starting time
+
+# Use intensity to get threshold
+To Intensity... 50 0 yes
+intid = selected("Intensity")
+start = Get time from frame number... 1
+nframes = Get number of frames
+end = Get time from frame number... 'nframes'
+
+# estimate noise floor
+minint = Get minimum... 0 0 Parabolic
+# estimate noise max
+maxint = Get maximum... 0 0 Parabolic
+#get .99 quantile to get maximum (without influence of non-speech sound bursts)
+max99int = Get quantile... 0 0 0.99
+
+# estimate Intensity threshold
+threshold = max99int + silencedb
+threshold2 = maxint - max99int
+threshold3 = silencedb - threshold2
+if threshold < minint
+   threshold = minint
+endif
+
+# get pauses (silences) and speakingtime
+To TextGrid (silences)... threshold3 minpause 0.1 silent sounding
+textgridid = selected("TextGrid")
+silencetierid = Extract tier... 1
+silencetableid = Down to TableOfReal... sounding
+nsounding = Get number of rows
+npauses = 'nsounding'
+speakingtot = 0
+for ipause from 1 to npauses
+  beginsound = Get value... 'ipause' 1
+  endsound = Get value... 'ipause' 2
+  speakingdur = 'endsound' - 'beginsound'
+  speakingtot = 'speakingdur' + 'speakingtot'
+endfor
+
+select 'intid'
+Down to Matrix
+matid = selected("Matrix")
+# Convert intensity to sound
+To Sound (slice)... 1
+sndintid = selected("Sound")
+
+# use total duration, not end time, to find out duration of intdur
+# in order to allow nonzero starting times.
+intdur = Get total duration
+intmax = Get maximum... 0 0 Parabolic
+
+# estimate peak positions (all peaks)
+To PointProcess (extrema)... Left yes no Sinc70
+ppid = selected("PointProcess")
+
+numpeaks = Get number of points
+
+# fill array with time points
+for i from 1 to numpeaks
+   t'i' = Get time from index... 'i'
+endfor 
+
+
+# fill array with intensity values
+select 'sndintid'
+peakcount = 0
+for i from 1 to numpeaks
+   value = Get value at time... t'i' Cubic
+   if value > threshold
+         peakcount += 1
+         int'peakcount' = value
+         timepeaks'peakcount' = t'i'
+   endif
+endfor
+
+
+# fill array with valid peaks: only intensity values if preceding 
+# dip in intensity is greater than mindip
+select 'intid'
+validpeakcount = 0
+currenttime = timepeaks1
+currentint = int1
+
+for p to peakcount-1
+  following = p + 1
+  followingtime = timepeaks'following'
+  dip = Get minimum... 'currenttime' 'followingtime' None
+  diffint = abs(currentint - dip)
+
+  if diffint > mindip
+     validpeakcount += 1
+     validtime'validpeakcount' = timepeaks'p'
+  endif
+     currenttime = timepeaks'following'
+     currentint = Get value at time... timepeaks'following' Cubic
+endfor
+
+
+# Look for only voiced parts
+select 'soundid' 
+To Pitch (ac)... 0.02 30 4 no 0.03 0.25 0.01 0.35 0.25 450
+# keep track of id of Pitch
+pitchid = selected("Pitch")
+
+voicedcount = 0
+for i from 1 to validpeakcount
+  querytime = validtime'i'
+
+  select 'textgridid'
+  whichinterval = Get interval at time... 1 'querytime'
+  whichlabel$ = Get label of interval... 1 'whichinterval'
+
+  select 'pitchid'
+  value = Get value at time... 'querytime' Hertz Linear
+
+  if value <> undefined
+     if whichlabel$ = "sounding"
+         voicedcount = voicedcount + 1
+         voicedpeak'voicedcount' = validtime'i'
+     endif
+  endif
+endfor
+
+
+# calculate time correction due to shift in time for Sound object versus
+# intensity object
+timecorrection = originaldur/intdur
+
+# Insert voiced peaks in TextGrid
+if showtext > 0
+  select 'textgridid'
+  Insert point tier... 1 syllables
+  
+  for i from 1 to voicedcount
+      position = voicedpeak'i' * timecorrection
+      Insert point... 1 position 'i'
+  endfor
+endif
+
+# clean up before next sound file is opened
+select 'intid'
+plus 'matid'
+plus 'sndintid'
+plus 'ppid'
+plus 'pitchid'
+plus 'silencetierid'
+plus 'silencetableid'
+
+Remove
+if showtext < 1
+   select 'soundid'
+   plus 'textgridid'
+   Remove
+endif
+
+# summarize results in Info window
+speakingrate = 'voicedcount'/'originaldur'
+articulationrate = 'voicedcount'/'speakingtot'
+npause = 'npauses'-1
+asd = 'speakingtot'/'voicedcount'
+
+printline 'soundname$', 'voicedcount', 'npause', 'originaldur:2', 'speakingtot:2', 'speakingrate:2', 'articulationrate:2', 'asd:3'