From 17af9b5d060490ecc7a0955f1b3f4ded97f9f69d Mon Sep 17 00:00:00 2001 From: Bai Li Date: Fri, 28 Sep 2018 01:38:23 -0400 Subject: [PATCH] Pipeline for Praat syllable nuclei script --- README.md | 6 + config.py | 1 + nodes/audio.py | 22 ++++ pipelines/pipelines.py | 11 +- scripts/syllable_nuclei_v2.praat | 188 +++++++++++++++++++++++++++++++ 5 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 scripts/syllable_nuclei_v2.praat diff --git a/README.md b/README.md index 29e261e..84c2ca2 100644 --- a/README.md +++ b/README.md @@ -205,6 +205,12 @@ path_to_rst_python=path/to/deps/RST/rstenv/bin/python path_to_rst=path/to/deps/RST/src/ ``` +##### Praat Syllable Nuclei Detection +Add the following line to 'config.ini': +```bash +path_to_praat=path/to/deps/praat_barren +``` + diff --git a/config.py b/config.py index f4420a5..caf3eeb 100644 --- a/config.py +++ b/config.py @@ -43,6 +43,7 @@ def _get_var(key, default=DEFAULT_NOT_SET): path_to_stanford_cp = _get_var("path_to_stanford_cp") path_to_lda_model = _get_var("path_to_lda_model") path_to_lda_wordids = _get_var("path_to_lda_wordids") +path_to_praat = _get_var("path_to_praat", None) nltk_data = _get_var("NLTK_DATA", None) diff --git a/nodes/audio.py b/nodes/audio.py index c775ae8..db69f0a 100644 --- a/nodes/audio.py +++ b/nodes/audio.py @@ -2,12 +2,14 @@ from abc import ABC, abstractmethod import os import logging +import subprocess from nodes.helper import FileOutputNode from utils import file_utils from utils import signal_processing as sp from utils.shell_run import shell_run from config import OPENSMILE_HOME +from config import path_to_praat class Mp3ToWav(FileOutputNode): def run(self, mp3_file): @@ -183,3 +185,23 @@ def run(self, in_file): f.write(extra_info) self.emit([seg_path, extra_path]) + + +class PraatRunner(FileOutputNode): + def run(self, in_file): + self.log(logging.INFO, "Starting %s" % (in_file)) + + out_file = self.derive_new_file_path(in_file, 'csv') + + if file_utils.should_run(in_file, out_file): + cmd = [path_to_praat, '--run', 'scripts/syllable_nuclei_v2.praat', in_file] + with open(out_file, 'w') as out_file_handle: + res = subprocess.call(cmd, stdout=out_file_handle) + + if res != 0: + self.log(logging.ERROR,"Failed %s -> %s with error code %i. cmd: %s" % (in_file, out_file, res, " ".join(cmd))) + return + + self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file)) + + self.emit([out_file]) diff --git a/pipelines/pipelines.py b/pipelines/pipelines.py index e0c2e7b..adcb1cb 100644 --- a/pipelines/pipelines.py +++ b/pipelines/pipelines.py @@ -69,4 +69,13 @@ def lex(in_folder, out_folder, num_threads): p = ProgressPipeline(file_finder | feats, n_threads=num_threads, quiet=True) - return p \ No newline at end of file + return p + +@pipeline_registry +def praat_syllable_nuclei(in_folder, out_folder, num_threads): + file_finder = helper.FindFiles("file_finder", dir=in_folder, ext=".wav") + + praat = audio.PraatRunner("praat_syllable_nuclei", out_dir=out_folder) + p = ProgressPipeline(file_finder | praat, n_threads=num_threads, quiet=True) + + return p diff --git a/scripts/syllable_nuclei_v2.praat b/scripts/syllable_nuclei_v2.praat new file mode 100644 index 0000000..d86a870 --- /dev/null +++ b/scripts/syllable_nuclei_v2.praat @@ -0,0 +1,188 @@ +# Modified from Praat Script Syllable Nuclei v2 +# https://sites.google.com/site/speechrate/Home/praat-script-syllable-nuclei-v2 + +form GetFileName command line calls + sentence infile +endform + + +# Default parameters +silencedb = -25 +mindip = 2 +showtext = 1 +minpause = 0.3 + +# print a single header line with column names and units +printline soundname, nsyll, npause, dur (s), phonationtime (s), speechrate (nsyll/dur), articulation rate (nsyll / phonationtime), ASD (speakingtime/nsyll) + +# read the file +Read from file... 'infile$' + +# use object ID +soundname$ = selected$("Sound") +soundid = selected("Sound") + +originaldur = Get total duration +# allow non-zero starting time +bt = Get starting time + +# Use intensity to get threshold +To Intensity... 50 0 yes +intid = selected("Intensity") +start = Get time from frame number... 1 +nframes = Get number of frames +end = Get time from frame number... 'nframes' + +# estimate noise floor +minint = Get minimum... 0 0 Parabolic +# estimate noise max +maxint = Get maximum... 0 0 Parabolic +#get .99 quantile to get maximum (without influence of non-speech sound bursts) +max99int = Get quantile... 0 0 0.99 + +# estimate Intensity threshold +threshold = max99int + silencedb +threshold2 = maxint - max99int +threshold3 = silencedb - threshold2 +if threshold < minint + threshold = minint +endif + +# get pauses (silences) and speakingtime +To TextGrid (silences)... threshold3 minpause 0.1 silent sounding +textgridid = selected("TextGrid") +silencetierid = Extract tier... 1 +silencetableid = Down to TableOfReal... sounding +nsounding = Get number of rows +npauses = 'nsounding' +speakingtot = 0 +for ipause from 1 to npauses + beginsound = Get value... 'ipause' 1 + endsound = Get value... 'ipause' 2 + speakingdur = 'endsound' - 'beginsound' + speakingtot = 'speakingdur' + 'speakingtot' +endfor + +select 'intid' +Down to Matrix +matid = selected("Matrix") +# Convert intensity to sound +To Sound (slice)... 1 +sndintid = selected("Sound") + +# use total duration, not end time, to find out duration of intdur +# in order to allow nonzero starting times. +intdur = Get total duration +intmax = Get maximum... 0 0 Parabolic + +# estimate peak positions (all peaks) +To PointProcess (extrema)... Left yes no Sinc70 +ppid = selected("PointProcess") + +numpeaks = Get number of points + +# fill array with time points +for i from 1 to numpeaks + t'i' = Get time from index... 'i' +endfor + + +# fill array with intensity values +select 'sndintid' +peakcount = 0 +for i from 1 to numpeaks + value = Get value at time... t'i' Cubic + if value > threshold + peakcount += 1 + int'peakcount' = value + timepeaks'peakcount' = t'i' + endif +endfor + + +# fill array with valid peaks: only intensity values if preceding +# dip in intensity is greater than mindip +select 'intid' +validpeakcount = 0 +currenttime = timepeaks1 +currentint = int1 + +for p to peakcount-1 + following = p + 1 + followingtime = timepeaks'following' + dip = Get minimum... 'currenttime' 'followingtime' None + diffint = abs(currentint - dip) + + if diffint > mindip + validpeakcount += 1 + validtime'validpeakcount' = timepeaks'p' + endif + currenttime = timepeaks'following' + currentint = Get value at time... timepeaks'following' Cubic +endfor + + +# Look for only voiced parts +select 'soundid' +To Pitch (ac)... 0.02 30 4 no 0.03 0.25 0.01 0.35 0.25 450 +# keep track of id of Pitch +pitchid = selected("Pitch") + +voicedcount = 0 +for i from 1 to validpeakcount + querytime = validtime'i' + + select 'textgridid' + whichinterval = Get interval at time... 1 'querytime' + whichlabel$ = Get label of interval... 1 'whichinterval' + + select 'pitchid' + value = Get value at time... 'querytime' Hertz Linear + + if value <> undefined + if whichlabel$ = "sounding" + voicedcount = voicedcount + 1 + voicedpeak'voicedcount' = validtime'i' + endif + endif +endfor + + +# calculate time correction due to shift in time for Sound object versus +# intensity object +timecorrection = originaldur/intdur + +# Insert voiced peaks in TextGrid +if showtext > 0 + select 'textgridid' + Insert point tier... 1 syllables + + for i from 1 to voicedcount + position = voicedpeak'i' * timecorrection + Insert point... 1 position 'i' + endfor +endif + +# clean up before next sound file is opened +select 'intid' +plus 'matid' +plus 'sndintid' +plus 'ppid' +plus 'pitchid' +plus 'silencetierid' +plus 'silencetableid' + +Remove +if showtext < 1 + select 'soundid' + plus 'textgridid' + Remove +endif + +# summarize results in Info window +speakingrate = 'voicedcount'/'originaldur' +articulationrate = 'voicedcount'/'speakingtot' +npause = 'npauses'-1 +asd = 'speakingtot'/'voicedcount' + +printline 'soundname$', 'voicedcount', 'npause', 'originaldur:2', 'speakingtot:2', 'speakingrate:2', 'articulationrate:2', 'asd:3'