diff --git a/g2p/mappings/langs/generated/config-g2p.yaml b/g2p/mappings/langs/generated/config-g2p.yaml index 1cfb893d..fcd91c54 100644 --- a/g2p/mappings/langs/generated/config-g2p.yaml +++ b/g2p/mappings/langs/generated/config-g2p.yaml @@ -1,5 +1,6 @@ mappings: - - case_sensitive: true + - case_equivalencies: {} + case_sensitive: true display_name: Atikamekw IPA to English IPA escape_special: false in_lang: atj-ipa @@ -7,6 +8,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -14,6 +16,7 @@ mappings: - author: Generated 2019-09-26 13:48:19.852180 authors: - Generated 2020-07-06 16:39:58.770641 + case_equivalencies: {} case_sensitive: true display_name: Danish IPA to English IPA escape_special: false @@ -22,6 +25,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -30,6 +34,7 @@ mappings: authors: - Generated 2021-04-14 16:34:39.868437 - Tidied up 2021-04-14 by Eric Joanis + case_equivalencies: {} case_sensitive: true display_name: French IPA to English IPA escape_special: false @@ -38,6 +43,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -45,6 +51,7 @@ mappings: - author: Generated 2019-10-24 16:21:41.605592 authors: - Generated 2022-04-12 11:26:30.098188 + case_equivalencies: {} case_sensitive: true display_name: SENĆOŦEN IPA to English IPA escape_special: false @@ -53,6 +60,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -60,6 +68,7 @@ mappings: - author: Generated 2019-10-24 16:21:41.605592 authors: - Generated 2022-04-12 11:26:30.098188 + case_equivalencies: {} case_sensitive: true display_name: SENĆOŦEN IPA to English IPA escape_special: false @@ -68,11 +77,13 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: str-ipa_to_hamming-eng-ipa.json - author: Generated 2019-10-23 11:18:26.278696 + case_equivalencies: {} case_sensitive: true display_name: Algonquin IPA to English IPA escape_special: false @@ -81,12 +92,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: alq-ipa_to_eng-ipa.json - authors: - Generated 2020-03-06 00:15:29.676518 + case_equivalencies: {} case_sensitive: true display_name: see-ipa IPA to eng-ipa IPA escape_special: false @@ -95,12 +108,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: see-ipa_to_eng-ipa.json - authors: - Generated 2020-03-30 13:17:41.062858 + case_equivalencies: {} case_sensitive: true display_name: lml-ipa IPA to eng-ipa IPA escape_special: false @@ -109,12 +124,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: lml-ipa_to_eng-ipa.json - authors: - Generated 2020-04-27 01:30:32.371649 + case_equivalencies: {} case_sensitive: false display_name: oji-ipa IPA to eng-ipa IPA escape_special: false @@ -123,12 +140,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written rules_path: oji-ipa_to_eng-ipa.json - authors: - Generated 2020-07-28 14:28:24.895117 + case_equivalencies: {} case_sensitive: false display_name: gla-ipa IPA to eng-ipa IPA escape_special: false @@ -137,12 +156,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: gla-ipa_to_eng-ipa.json - authors: - Generated 2020-08-06 16:06:34.805350 + case_equivalencies: {} case_sensitive: true display_name: crk-ipa IPA to eng-ipa IPA escape_special: false @@ -151,12 +172,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: crk-ipa_to_eng-ipa.json - authors: - Generated 2021-03-07 22:23:34.538427 + case_equivalencies: {} case_sensitive: false display_name: tce-ipa IPA to eng-ipa IPA escape_special: false @@ -165,12 +188,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: tce-ipa_to_eng-ipa.json - authors: - Generated 2023-10-04 19:33:36.891594 + case_equivalencies: {} case_sensitive: false display_name: crg-ipa IPA to eng-ipa IPA escape_special: false @@ -179,6 +204,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written @@ -187,6 +213,7 @@ mappings: - Generated 2020-09-18 10:40:15.289530 - Updated by hand 2021-02-10 Eric Joanis to handle ejectives - Update by hand 2021-05-21 Shankhalika Srikanth reorder rules with aspiration + case_equivalencies: {} case_sensitive: false display_name: Tlingit IPA to English IPA escape_special: false @@ -195,12 +222,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written rules_path: tli-ipa_to_eng-ipa.json - authors: - Generated 2020-11-30 22:36:15.639360 + case_equivalencies: {} case_sensitive: false display_name: Gwich'in IPA to English IPA escape_special: false @@ -209,12 +238,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: gwi-ipa_to_eng-ipa.json - authors: - Generated 2022-09-09 13:04:46.128081 + case_equivalencies: {} case_sensitive: false display_name: mic-ipa IPA to eng-ipa IPA escape_special: false @@ -223,12 +254,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written rules_path: mic-ipa_to_eng-ipa.json - authors: - Generated 2021-03-08 17:26:16.371203 + case_equivalencies: {} case_sensitive: true display_name: iku-ipa IPA to eng-ipa IPA escape_special: false @@ -237,12 +270,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: iku-ipa_to_eng-ipa.json - authors: - Generated 2021-03-18 14:51:53.222666 + case_equivalencies: {} case_sensitive: false display_name: ikt-ipa IPA to eng-ipa IPA escape_special: false @@ -251,12 +286,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: ikt-ipa_to_eng-ipa.json - authors: - Generated 2021-03-18 16:13:29.007425 + case_equivalencies: {} case_sensitive: false display_name: iku-sro-ipa IPA to eng-ipa IPA escape_special: false @@ -265,12 +302,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: iku-sro-ipa_to_eng-ipa.json - authors: - Generated 2021-03-23 11:55:07.200851 + case_equivalencies: {} case_sensitive: false display_name: haa-ipa IPA to eng-ipa IPA escape_special: false @@ -279,12 +318,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: haa-ipa_to_eng-ipa.json - authors: - Generated 2021-03-07 23:35:39.198455 + case_equivalencies: {} case_sensitive: false display_name: ttm-ipa IPA to eng-ipa IPA escape_special: false @@ -293,12 +334,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: ttm-ipa_to_eng-ipa.json - authors: - Generated 2021-08-01 19:37:38.316339 + case_equivalencies: {} case_sensitive: false display_name: tau-ipa IPA to eng-ipa IPA escape_special: false @@ -307,12 +350,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: tau-ipa_to_eng-ipa.json - authors: - Generated 2023-08-15 12:26:00.528774 + case_equivalencies: {} case_sensitive: false display_name: moh-ipa IPA to eng-ipa IPA escape_special: false @@ -321,12 +366,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: moh-ipa_to_eng-ipa.json - authors: - Generated 2022-04-12 11:42:52.894700 + case_equivalencies: {} case_sensitive: false display_name: moh-ipa IPA to eng-ipa IPA escape_special: false @@ -335,12 +382,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: moh-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-04-11 16:24:52.967497 + case_equivalencies: {} case_sensitive: true display_name: moh-equiv dummy to dummy dummy escape_special: false @@ -349,12 +398,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: moh-equiv_to_dummy.json - authors: - Generated 2022-04-11 16:24:52.967497 + case_equivalencies: {} case_sensitive: true display_name: moh-equiv dummy to hamming dummy escape_special: false @@ -363,12 +414,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: moh-equiv_to_hamming-dummy.json - authors: - Generated 2022-04-12 10:47:28.528081 + case_equivalencies: {} case_sensitive: true display_name: str-equiv dummy to dummy dummy escape_special: false @@ -377,12 +430,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: str-equiv_to_dummy.json - authors: - Generated 2022-04-12 10:47:28.528081 + case_equivalencies: {} case_sensitive: true display_name: str-equiv dummy to hamming dummy escape_special: false @@ -391,12 +446,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: str-equiv_to_hamming-dummy.json - authors: - Generated 2022-04-12 10:57:49.409626 + case_equivalencies: {} case_sensitive: true display_name: und-ascii dummy to dummy dummy escape_special: false @@ -405,12 +462,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: und-ascii_to_dummy.json - authors: - Generated 2022-04-12 10:56:44.604694 + case_equivalencies: {} case_sensitive: true display_name: und-ascii dummy to dummy dummy escape_special: false @@ -419,12 +478,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: und-ascii_to_hamming-dummy.json - authors: - Generated 2022-05-20 16:36:31.566493 + case_equivalencies: {} case_sensitive: true display_name: iku-ipa to hamming-eng-ipa escape_special: false @@ -433,12 +494,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: iku-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-05-20 16:55:13.188319 + case_equivalencies: {} case_sensitive: true display_name: und-ipa to hamming-eng-ipa escape_special: false @@ -447,12 +510,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: und-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-05-20 19:12:51.416268 + case_equivalencies: {} case_sensitive: true display_name: ikt-ipa to hamming-eng-ipa escape_special: false @@ -461,12 +526,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: ikt-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-06-07 10:24:54.512807 + case_equivalencies: {} case_sensitive: true display_name: moe-ipa to eng-ipa escape_special: false @@ -475,12 +542,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: moe-ipa_to_eng-ipa.json - authors: - Generated 2022-07-22 13:54:50.454043 + case_equivalencies: {} case_sensitive: false display_name: win-ipa IPA to eng-ipa IPA escape_special: false @@ -489,6 +558,7 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first @@ -496,6 +566,7 @@ mappings: - authors: - Generated 2022-07-20 06:53:42.624015 - Updated by hand 2021-07-20 David Huggins-Daines to improve vowels + case_equivalencies: {} case_sensitive: false display_name: fin-ipa IPA to eng-ipa IPA escape_special: false @@ -504,12 +575,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: fin-ipa_to_eng-ipa.json - authors: - Generated 2023-01-13 11:12:51.451010 + case_equivalencies: {} case_sensitive: true display_name: oka-ipa to eng-ipa escape_special: false @@ -518,12 +591,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: oka-ipa_to_eng-ipa.json - authors: - Generated 2023-06-26 15:06:13.793560 + case_equivalencies: {} case_sensitive: true display_name: clm-ipa to eng-ipa escape_special: false @@ -532,7 +607,25 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: clm-ipa_to_eng-ipa.json + - authors: + - Generated 2024-04-18 13:30:09.143401 + - Manual fixes by Eric Joanis + case_equivalencies: {} + case_sensitive: true + display_name: sal-ipa to eng-ipa + escape_special: false + in_lang: sal-ipa + language_name: IPA + norm_form: NFC + out_delimiter: '' + out_lang: eng-ipa + preserve_case: false + prevent_feeding: true + reverse: false + rule_ordering: apply-longest-first + rules_path: sal-ipa_to_eng-ipa.json diff --git a/g2p/mappings/langs/generated/sal-ipa_to_eng-ipa.json b/g2p/mappings/langs/generated/sal-ipa_to_eng-ipa.json new file mode 100644 index 00000000..69cfb768 --- /dev/null +++ b/g2p/mappings/langs/generated/sal-ipa_to_eng-ipa.json @@ -0,0 +1,94 @@ +[ + {"in": "ˈa̠", "out": "æ"}, + {"in": "ˈə̠", "out": "ə"}, + {"in": "h̠ʷ", "out": "hw"}, + {"in": "kʼʷ", "out": "kw"}, + {"in": "qʼʷ", "out": "kw"}, + {"in": "ˈu̠", "out": "u"}, + {"in": "ʕˀʷ", "out": "ʒw"}, + {"in": "x̠ʷ", "out": "kw"}, + {"in": "tsʼ", "out": "ts"}, + {"in": "tsʼ", "out": "ts"}, + {"in": "tɬʼ", "out": "ts"}, + {"in": "ʧʼ", "out": "tʃ"}, + {"in": "χʷ", "out": "ʃw"}, + {"in": "ˈa", "out": "æ"}, + {"in": "a̠", "out": "æ"}, + {"in": "ˈæ", "out": "æ"}, + {"in": "c̠", "out": "k"}, + {"in": "ˈɛ", "out": "ɛ"}, + {"in": "ˈə", "out": "ə"}, + {"in": "ˈʌ", "out": "ʌ"}, + {"in": "ɡʷ", "out": "ɡw"}, + {"in": "ˈi", "out": "i"}, + {"in": "ɣˀ", "out": "ɡ"}, + {"in": "kʼ", "out": "k"}, + {"in": "kʷ", "out": "kw"}, + {"in": "lˀ", "out": "l"}, + {"in": "l̠", "out": "l"}, + {"in": "mˀ", "out": "m"}, + {"in": "nˀ", "out": "n"}, + {"in": "n̠", "out": "n"}, + {"in": "ˈo", "out": "ɔ"}, + {"in": "ˈo", "out": "ɔ"}, + {"in": "ˈɔ", "out": "ɔ"}, + {"in": "pʼ", "out": "p"}, + {"in": "qʼ", "out": "k"}, + {"in": "qʷ", "out": "kw"}, + {"in": "ɾˀ", "out": "ɾ"}, + {"in": "s̠", "out": "s"}, + {"in": "tʼ", "out": "t"}, + {"in": "tʼ", "out": "t"}, + {"in": "ʕˀ", "out": "ʒ"}, + {"in": "ˈu", "out": "u"}, + {"in": "ˈu", "out": "u"}, + {"in": "u̠", "out": "u"}, + {"in": "ʕʷ", "out": "ʒw"}, + {"in": "x̠", "out": "k"}, + {"in": "xʷ", "out": "kw"}, + {"in": "wˀ", "out": "w"}, + {"in": "jˀ", "out": "j"}, + {"in": "zˀ", "out": "z"}, + {"in": "zˀ", "out": "z"}, + {"in": "ts", "out": "ts"}, + {"in": "ʧ", "out": "tʃ"}, + {"in": "ʤ", "out": "dʒ"}, + {"in": "h", "out": "h"}, + {"in": "χ", "out": "ʃ"}, + {"in": "a", "out": "æ"}, + {"in": "æ", "out": "æ"}, + {"in": "θ", "out": "θ"}, + {"in": "ᶿ", "out": ""}, + {"in": "ɛ", "out": "ɛ"}, + {"in": "ʌ", "out": "ʌ"}, + {"in": "ə", "out": "ə"}, + {"in": "ə", "out": "ə"}, + {"in": "ɡ", "out": "ɡ"}, + {"in": "h", "out": "h"}, + {"in": "i", "out": "i"}, + {"in": "ɣ", "out": "ɡ"}, + {"in": "k", "out": "k"}, + {"in": "l", "out": "l"}, + {"in": "ɬ", "out": "s"}, + {"in": "ɬ", "out": "s"}, + {"in": "m", "out": "m"}, + {"in": "n", "out": "n"}, + {"in": "ŋ", "out": "ŋ"}, + {"in": "o", "out": "ɔ"}, + {"in": "ɔ", "out": "ɔ"}, + {"in": "p", "out": "p"}, + {"in": "q", "out": "k"}, + {"in": "ɾ", "out": "ɾ"}, + {"in": "s", "out": "s"}, + {"in": "t", "out": "t"}, + {"in": "ʕ", "out": "ʒ"}, + {"in": "u", "out": "u"}, + {"in": "x", "out": "k"}, + {"in": "w", "out": "w"}, + {"in": "j", "out": "j"}, + {"in": "w", "out": "w"}, + {"in": "ʷ", "out": "w"}, + {"in": "ʡ", "out": "ʔ"}, + {"in": "ʔ", "out": "ʔ"}, + {"in": "ˀ", "out": ""} +] diff --git a/g2p/mappings/langs/sal/config-g2p.yaml b/g2p/mappings/langs/sal/config-g2p.yaml new file mode 100644 index 00000000..eaa0ab66 --- /dev/null +++ b/g2p/mappings/langs/sal/config-g2p.yaml @@ -0,0 +1,23 @@ +<<: &shared + language_name: Salishan languages with APA-based writing systems +mappings: + - display_name: Salishan APA to IPA + rules_path: sal_apa_to_ipa.csv + in_lang: sal-apa-equiv + out_lang: sal-ipa + authors: + - Tony Mattina + - Eric Joanis + type: mapping + prevent_feeding: true + rule_ordering: apply-longest-first + norm_form: NFD + <<: *shared + - display_name: Unicode Equivalencies + in_lang: sal-apa + out_lang: sal-apa-equiv + authors: + - Eric Joanis + rules_path: sal_equiv.csv + norm_form: NFD + <<: *shared diff --git a/g2p/mappings/langs/sal/sal_apa_to_ipa.csv b/g2p/mappings/langs/sal/sal_apa_to_ipa.csv new file mode 100644 index 00000000..a45a2aa3 --- /dev/null +++ b/g2p/mappings/langs/sal/sal_apa_to_ipa.csv @@ -0,0 +1,96 @@ +a,a,, +a',ˈa,, +á,ˈa,, +ạ,a̠,, +ạ́,ˈa̠,, +æ,æ,, +æ',ˈæ,, +c,ts,, +c̣,c̠,, +θ,θ,, +ᶿ,ᶿ,, +c',tsʼ,, +c̓,tsʼ,, +č,ʧ,, +č̓,ʧʼ,, +ǰ,ʤ,, +ɛ,ɛ,, +ɛ́,ˈɛ,, +ʌ,ʌ,, +e,ə,, +é,ˈə,, +ẹ́,ˈə̠,, +ə,ə,, +ə́,ˈə,, +ə̣́,ˈə̠,, +ʌ́,ˈʌ,, +g,ɡ,, +gʷ,ɡʷ,, +h,h,, +ḥ,h,, +ḥʷ,h̠ʷ,, +i,i,, +i',ˈi,, +ɣ,ɣ,, +ɣ',ɣˀ,, +k,k,, +k',kʼ,, +kʷ,kʷ,, +k'ʷ,kʼʷ,, +l,l,, +l',lˀ,, +ḷ,l̠,, +ɬ,ɬ,, +ł,ɬ,, +ƛ',tɬʼ,, +m,m,, +m',mˀ,, +n,n,, +n',nˀ,, +ṇ,n̠,, +ŋ,ŋ,, +o,o,, +o',ˈo,, +ó,ˈo,, +ɔ,ɔ,, +ɔ́,ˈɔ,, +p,p,, +p',pʼ,, +q,q,, +q',qʼ,, +qʷ,qʷ,, +q'ʷ,qʼʷ,, +r,ɾ,, +r',ɾˀ,, +s,s,, +ṣ,s̠,, +š,ʃ,, +t,t,, +t',tʼ,, +ť,tʼ,, +ʕ,ʕ,, +ʕ',ʕˀ,, +u,u,, +u',ˈu,, +ú,ˈu,, +ụ,u̠,, +ụ́,ˈu̠,, +ʕʷ,ʕʷ,, +ʕ'ʷ,ʕˀʷ,, +x̌,χ,, +x,x,, +x̣,x̠,, +x̣ʷ,x̠ʷ,, +w,w,, +y,j,, +x̌ʷ,χʷ,, +xʷ,xʷ,, +w',wˀ,, +w,w,, +y',jˀ,, +ʷ,ʷ,, +ʡ,ʡ,, +ʔ,ʔ,, +ˀ,ˀ,, +z',zˀ,, +z̓,zˀ,, diff --git a/g2p/mappings/langs/sal/sal_equiv.csv b/g2p/mappings/langs/sal/sal_equiv.csv new file mode 100644 index 00000000..ab926a91 --- /dev/null +++ b/g2p/mappings/langs/sal/sal_equiv.csv @@ -0,0 +1,4 @@ +’,' +‘,' +\u0315,' +\u0301{1}\u0323{2},\u0323{2}\u0301{1} diff --git a/g2p/tests/public/data/sal-arpabet.tsv b/g2p/tests/public/data/sal-arpabet.tsv new file mode 100644 index 00000000..97dd9b19 --- /dev/null +++ b/g2p/tests/public/data/sal-arpabet.tsv @@ -0,0 +1,105 @@ +# some more sample words +sal-apa eng-arpabet nə́c̕uʔ N AH T S UW HH +sal-apa eng-arpabet čə́saʔ CH AH S AE HH +sal-apa eng-arpabet ɬixʷ S IY K W +sal-apa eng-arpabet ŋús NG UW S +sal-apa eng-arpabet ɬq̕áčš S K AE CH SH +sal-apa eng-arpabet t̕χə́ŋ CH AH NG +sal-apa eng-arpabet c̕úʔkʷs T S UW HH K W S +sal-apa eng-arpabet táʔcs T AE HH T S S +sal-apa eng-arpabet tə́kʷxʷ T AH K W K W +sal-apa eng-arpabet ʔúpən HH UW P AH N +sal-apa eng-arpabet t̕ᶿáʔkʷəs T AE HH K W AH S +sal-apa eng-arpabet téʔθəs T AH HH TH AH S +sal-apa eng-arpabet nə́t̕ᶿəʔ N AH T AH HH +sal-apa eng-arpabet ɬəxʷɬšeʔ S AH K W S SH AH HH + +# Validate the impact of feeding +sal-apa eng-arpabet a AE +sal-apa eng-arpabet a' AE +sal-apa eng-arpabet ạ AE +sal-apa eng-arpabet ạ́ AE +sal-apa eng-arpabet æ AE +sal-apa eng-arpabet æ' AE +sal-apa eng-arpabet c T S +sal-apa eng-arpabet c̣ K +sal-apa eng-arpabet θ TH +sal-apa eng-arpabet c' T S +sal-apa eng-arpabet č CH +sal-apa eng-arpabet č̓ CH +sal-apa eng-arpabet ǰ JH +sal-apa eng-arpabet ɛ EH +sal-apa eng-arpabet ɛ́ EH +sal-apa eng-arpabet ʌ AH +sal-apa eng-arpabet ə AH +sal-apa eng-arpabet e AH +sal-apa eng-arpabet ə́ AH +sal-apa eng-arpabet ə̣́ AH +sal-apa eng-arpabet ʌ́ AH +sal-apa eng-arpabet g G +sal-apa eng-arpabet gʷ G W +sal-apa eng-arpabet h HH +sal-apa eng-arpabet ḥ HH +sal-apa eng-arpabet ḥʷ HH W +sal-apa eng-arpabet i IY +sal-apa eng-arpabet i' IY +sal-apa eng-arpabet ɣ G +sal-apa eng-arpabet ɣ' G +sal-apa eng-arpabet k K +sal-apa eng-arpabet k' K +sal-apa eng-arpabet kʷ K W +sal-apa eng-arpabet k'ʷ K W +sal-apa eng-arpabet l L +sal-apa eng-arpabet l' L +sal-apa eng-arpabet ḷ L +sal-apa eng-arpabet ɬ S +sal-apa eng-arpabet ł S +sal-apa eng-arpabet ƛ' T S +sal-apa eng-arpabet m M +sal-apa eng-arpabet m' M +sal-apa eng-arpabet n N +sal-apa eng-arpabet n' N +sal-apa eng-arpabet ṇ N +sal-apa eng-arpabet ŋ NG +sal-apa eng-arpabet o AO +sal-apa eng-arpabet o' AO +sal-apa eng-arpabet ó AO +sal-apa eng-arpabet ɔ AO +sal-apa eng-arpabet ɔ́ AO +sal-apa eng-arpabet p P +sal-apa eng-arpabet p' P +sal-apa eng-arpabet q K +sal-apa eng-arpabet q' K +sal-apa eng-arpabet qʷ K W +sal-apa eng-arpabet q'ʷ K W +sal-apa eng-arpabet r D +sal-apa eng-arpabet r' D +sal-apa eng-arpabet s S +sal-apa eng-arpabet ṣ S +sal-apa eng-arpabet t T +sal-apa eng-arpabet t' T +sal-apa eng-arpabet ť T +sal-apa eng-arpabet ʕ ZH +sal-apa eng-arpabet ʕ' ZH +sal-apa eng-arpabet u UW +sal-apa eng-arpabet u' UW +sal-apa eng-arpabet ú UW +sal-apa eng-arpabet ụ UW +sal-apa eng-arpabet ụ́ UW +sal-apa eng-arpabet ʕʷ ZH W +sal-apa eng-arpabet ʕ'ʷ ZH W +sal-apa eng-arpabet x̌ SH +sal-apa eng-arpabet x K +sal-apa eng-arpabet x̣ K +sal-apa eng-arpabet x̣ʷ K W +sal-apa eng-arpabet w W +sal-apa eng-arpabet y Y +sal-apa eng-arpabet x̌ʷ SH W +sal-apa eng-arpabet xʷ K W +sal-apa eng-arpabet w' W +sal-apa eng-arpabet w W +sal-apa eng-arpabet y' Y +sal-apa eng-arpabet ʷ W +sal-apa eng-arpabet ʡ HH +sal-apa eng-arpabet ʔ HH +sal-apa eng-arpabet z' Z diff --git a/g2p/tests/public/data/sal.tsv b/g2p/tests/public/data/sal.tsv new file mode 100644 index 00000000..ba6fa73d --- /dev/null +++ b/g2p/tests/public/data/sal.tsv @@ -0,0 +1,107 @@ +# some more sample words +sal-apa sal-ipa nə́c̕uʔ nˈətsʼuʔ clm one +sal-apa sal-ipa čə́saʔ ʧˈəsaʔ clm two +sal-apa sal-ipa ɬixʷ ɬixʷ clm three +sal-apa sal-ipa ŋús ŋˈus clm four +sal-apa sal-ipa ɬq̕áčš ɬqʼˈaʧʃ clm five +sal-apa sal-ipa t̕χə́ŋ tʼχˈəŋ clm six +sal-apa sal-ipa c̕úʔkʷs tsʼˈuʔkʷs clm seven +sal-apa sal-ipa táʔcs tˈaʔtss clm eight +sal-apa sal-ipa tə́kʷxʷ tˈəkʷxʷ clm nine +sal-apa sal-ipa ʔúpən ʔˈupən clm ten +sal-apa sal-ipa t̕ᶿáʔkʷəs tʼᶿˈaʔkʷəs Saanich seven +sal-apa sal-ipa téʔθəs tˈəʔθəs Saanich eight +sal-apa sal-ipa nə́t̕ᶿəʔ nˈətʼᶿəʔ Saanich one +sal-apa sal-ipa ɬəxʷɬšeʔ ɬəxʷɬʃəʔ Saanich thirty + +# Validate the impact of feeding +sal-apa sal-ipa a a +sal-apa sal-ipa a' ˈa +sal-apa sal-ipa ạ a̠ +sal-apa sal-ipa ạ́ ˈa̠ +sal-apa sal-ipa æ æ +sal-apa sal-ipa æ' ˈæ +sal-apa sal-ipa c ts +sal-apa sal-ipa c̣ c̠ +sal-apa sal-ipa θ θ +sal-apa sal-ipa c' tsʼ +sal-apa sal-ipa č ʧ +sal-apa sal-ipa č̓ ʧʼ +sal-apa sal-ipa ǰ ʤ +sal-apa sal-ipa ɛ ɛ +sal-apa sal-ipa ɛ́ ˈɛ +sal-apa sal-ipa ʌ ʌ +sal-apa sal-ipa ə ə +sal-apa sal-ipa e ə +sal-apa sal-ipa ə́ ˈə +sal-apa sal-ipa ə̣́ ˈə̠ +sal-apa sal-ipa ʌ́ ˈʌ +sal-apa sal-ipa g ɡ +sal-apa sal-ipa gʷ ɡʷ +sal-apa sal-ipa h h +sal-apa sal-ipa ḥ h +sal-apa sal-ipa ḥʷ h̠ʷ +sal-apa sal-ipa i i +sal-apa sal-ipa i' ˈi +sal-apa sal-ipa ɣ ɣ +sal-apa sal-ipa ɣ' ɣˀ +sal-apa sal-ipa k k +sal-apa sal-ipa k' kʼ +sal-apa sal-ipa kʷ kʷ +sal-apa sal-ipa k'ʷ kʼʷ +sal-apa sal-ipa l l +sal-apa sal-ipa l' lˀ +sal-apa sal-ipa ḷ l̠ +sal-apa sal-ipa ɬ ɬ +sal-apa sal-ipa ł ɬ +sal-apa sal-ipa ƛ' tɬʼ +sal-apa sal-ipa m m +sal-apa sal-ipa m' mˀ +sal-apa sal-ipa n n +sal-apa sal-ipa n' nˀ +sal-apa sal-ipa ṇ n̠ +sal-apa sal-ipa ŋ ŋ +sal-apa sal-ipa o o +sal-apa sal-ipa o' ˈo +sal-apa sal-ipa ó ˈo +sal-apa sal-ipa ɔ ɔ +sal-apa sal-ipa ɔ́ ˈɔ +sal-apa sal-ipa p p +sal-apa sal-ipa p' pʼ +sal-apa sal-ipa q q +sal-apa sal-ipa q' qʼ +sal-apa sal-ipa qʷ qʷ +sal-apa sal-ipa q'ʷ qʼʷ +sal-apa sal-ipa r ɾ +sal-apa sal-ipa r' ɾˀ +sal-apa sal-ipa s s +sal-apa sal-ipa ṣ s̠ +sal-apa sal-ipa t t +sal-apa sal-ipa t' tʼ +sal-apa sal-ipa ť tʼ +sal-apa sal-ipa ʕ ʕ +sal-apa sal-ipa ʕ' ʕˀ +sal-apa sal-ipa u u +sal-apa sal-ipa u' ˈu +sal-apa sal-ipa ú ˈu +sal-apa sal-ipa ụ u̠ +sal-apa sal-ipa ụ́ ˈu̠ +sal-apa sal-ipa ʕʷ ʕʷ +sal-apa sal-ipa ʕ'ʷ ʕˀʷ +sal-apa sal-ipa x̌ χ +sal-apa sal-ipa x x +sal-apa sal-ipa x̣ x̠ +sal-apa sal-ipa x̣ʷ x̠ʷ +sal-apa sal-ipa w w +sal-apa sal-ipa y j +sal-apa sal-ipa x̌ʷ χʷ +sal-apa sal-ipa xʷ xʷ +sal-apa sal-ipa w' wˀ +sal-apa sal-ipa w w +sal-apa sal-ipa y' jˀ +sal-apa sal-ipa ʷ ʷ +sal-apa sal-ipa ʡ ʡ +sal-apa sal-ipa ʔ ʔ +sal-apa sal-ipa ˀ ˀ +sal-apa sal-ipa z' zˀ +sal-apa sal-ipa z̓ zˀ