The goal of syllabifyr
is to provide tidy syllabification of phonetic
transcriptions. So far, only CMU dict transcriptions are supported.
I’ve largely utilized the same approach as Kyle Gorman’s python implementation
syllabifyr
is now on Cran
install.packages("syllabifyr")
You can also install syllabifyr
from github with:
# install.packages("devtools")
devtools::install_github("JoFrhwld/syllabifyr")
library(syllabifyr)
syllabify("AO0 S T R EY1 L Y AH0")
#> # A tibble: 8 × 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 nucleus AO 0
#> 2 2 onset S 1
#> 3 2 onset T 1
#> 4 2 onset R 1
#> 5 2 nucleus EY 1
#> 6 2 coda L 1
#> 7 3 onset Y 0
#> 8 3 nucleus AH 0
syllabify(c("AO0", "S", "T", "R", "EY1", "L", "Y", "AH0"))
#> # A tibble: 8 × 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 nucleus AO 0
#> 2 2 onset S 1
#> 3 2 onset T 1
#> 4 2 onset R 1
#> 5 2 nucleus EY 1
#> 6 2 coda L 1
#> 7 3 onset Y 0
#> 8 3 nucleus AH 0
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.3.1
#> Warning: package 'lubridate' was built under R version 4.3.1
syllabficiation <- tribble(~word, ~transcription,
"Alaska", "AH0 L AE1 S K AH0",
"constraint", "K AH0 N S T R EY1 N T",
"canyon", "K AE1 N Y AH0 N",
"value", "V AE1 L Y UW0")%>%
mutate(syllable_df = map(transcription, syllabify))
syllabficiation$syllable_df[[1]]
#> # A tibble: 6 × 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 nucleus AH 0
#> 2 2 onset L 1
#> 3 2 nucleus AE 1
#> 4 2 coda S 1
#> 5 3 onset K 0
#> 6 3 nucleus AH 0
syllabficiation$syllable_df[[2]]
#> # A tibble: 9 × 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 onset K 0
#> 2 1 nucleus AH 0
#> 3 1 coda N 0
#> 4 2 onset S 1
#> 5 2 onset T 1
#> 6 2 onset R 1
#> 7 2 nucleus EY 1
#> 8 2 coda N 1
#> 9 2 coda T 1
syllabficiation$syllable_df[[3]]
#> # A tibble: 6 × 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 onset K 1
#> 2 1 nucleus AE 1
#> 3 1 coda N 1
#> 4 2 onset Y 0
#> 5 2 nucleus AH 0
#> 6 2 coda N 0
syllabficiation$syllable_df[[4]]
#> # A tibble: 5 × 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 onset V 1
#> 2 1 nucleus AE 1
#> 3 1 coda L 1
#> 4 2 onset Y 0
#> 5 2 nucleus UW 0