From 7909e6ea07816d54b39519ab033b154a55bf6e6a Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Fri, 19 Apr 2024 15:02:01 -0400 Subject: [PATCH] feat: Add sal-apa generic mapping for APA-based Salish writing systems Collaborative work with Tony Mattina, who provided the list of characters and their mappings, as well as the sample words used for testing, with permission to publish. Co-authored-by: tonymattina <121320595+tonymattina@users.noreply.github.com> --- g2p/mappings/langs/generated/config-g2p.yaml | 95 +++++++++++++++- .../langs/generated/sal-ipa_to_eng-ipa.json | 94 +++++++++++++++ g2p/mappings/langs/sal/config-g2p.yaml | 23 ++++ g2p/mappings/langs/sal/sal_apa_to_ipa.csv | 96 ++++++++++++++++ g2p/mappings/langs/sal/sal_equiv.csv | 4 + g2p/tests/public/data/sal-arpabet.tsv | 105 +++++++++++++++++ g2p/tests/public/data/sal.tsv | 107 ++++++++++++++++++ 7 files changed, 523 insertions(+), 1 deletion(-) create mode 100644 g2p/mappings/langs/generated/sal-ipa_to_eng-ipa.json create mode 100644 g2p/mappings/langs/sal/config-g2p.yaml create mode 100644 g2p/mappings/langs/sal/sal_apa_to_ipa.csv create mode 100644 g2p/mappings/langs/sal/sal_equiv.csv create mode 100644 g2p/tests/public/data/sal-arpabet.tsv create mode 100644 g2p/tests/public/data/sal.tsv diff --git a/g2p/mappings/langs/generated/config-g2p.yaml b/g2p/mappings/langs/generated/config-g2p.yaml index 1cfb893d..fcd91c54 100644 --- a/g2p/mappings/langs/generated/config-g2p.yaml +++ b/g2p/mappings/langs/generated/config-g2p.yaml @@ -1,5 +1,6 @@ mappings: - - case_sensitive: true + - case_equivalencies: {} + case_sensitive: true display_name: Atikamekw IPA to English IPA escape_special: false in_lang: atj-ipa @@ -7,6 +8,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -14,6 +16,7 @@ mappings: - author: Generated 2019-09-26 13:48:19.852180 authors: - Generated 2020-07-06 16:39:58.770641 + case_equivalencies: {} case_sensitive: true display_name: Danish IPA to English IPA escape_special: false @@ -22,6 +25,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -30,6 +34,7 @@ mappings: authors: - Generated 2021-04-14 16:34:39.868437 - Tidied up 2021-04-14 by Eric Joanis + case_equivalencies: {} case_sensitive: true display_name: French IPA to English IPA escape_special: false @@ -38,6 +43,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -45,6 +51,7 @@ mappings: - author: Generated 2019-10-24 16:21:41.605592 authors: - Generated 2022-04-12 11:26:30.098188 + case_equivalencies: {} case_sensitive: true display_name: SENĆOŦEN IPA to English IPA escape_special: false @@ -53,6 +60,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first @@ -60,6 +68,7 @@ mappings: - author: Generated 2019-10-24 16:21:41.605592 authors: - Generated 2022-04-12 11:26:30.098188 + case_equivalencies: {} case_sensitive: true display_name: SENĆOŦEN IPA to English IPA escape_special: false @@ -68,11 +77,13 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: str-ipa_to_hamming-eng-ipa.json - author: Generated 2019-10-23 11:18:26.278696 + case_equivalencies: {} case_sensitive: true display_name: Algonquin IPA to English IPA escape_special: false @@ -81,12 +92,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: alq-ipa_to_eng-ipa.json - authors: - Generated 2020-03-06 00:15:29.676518 + case_equivalencies: {} case_sensitive: true display_name: see-ipa IPA to eng-ipa IPA escape_special: false @@ -95,12 +108,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: see-ipa_to_eng-ipa.json - authors: - Generated 2020-03-30 13:17:41.062858 + case_equivalencies: {} case_sensitive: true display_name: lml-ipa IPA to eng-ipa IPA escape_special: false @@ -109,12 +124,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: lml-ipa_to_eng-ipa.json - authors: - Generated 2020-04-27 01:30:32.371649 + case_equivalencies: {} case_sensitive: false display_name: oji-ipa IPA to eng-ipa IPA escape_special: false @@ -123,12 +140,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written rules_path: oji-ipa_to_eng-ipa.json - authors: - Generated 2020-07-28 14:28:24.895117 + case_equivalencies: {} case_sensitive: false display_name: gla-ipa IPA to eng-ipa IPA escape_special: false @@ -137,12 +156,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: gla-ipa_to_eng-ipa.json - authors: - Generated 2020-08-06 16:06:34.805350 + case_equivalencies: {} case_sensitive: true display_name: crk-ipa IPA to eng-ipa IPA escape_special: false @@ -151,12 +172,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: crk-ipa_to_eng-ipa.json - authors: - Generated 2021-03-07 22:23:34.538427 + case_equivalencies: {} case_sensitive: false display_name: tce-ipa IPA to eng-ipa IPA escape_special: false @@ -165,12 +188,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: tce-ipa_to_eng-ipa.json - authors: - Generated 2023-10-04 19:33:36.891594 + case_equivalencies: {} case_sensitive: false display_name: crg-ipa IPA to eng-ipa IPA escape_special: false @@ -179,6 +204,7 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written @@ -187,6 +213,7 @@ mappings: - Generated 2020-09-18 10:40:15.289530 - Updated by hand 2021-02-10 Eric Joanis to handle ejectives - Update by hand 2021-05-21 Shankhalika Srikanth reorder rules with aspiration + case_equivalencies: {} case_sensitive: false display_name: Tlingit IPA to English IPA escape_special: false @@ -195,12 +222,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written rules_path: tli-ipa_to_eng-ipa.json - authors: - Generated 2020-11-30 22:36:15.639360 + case_equivalencies: {} case_sensitive: false display_name: Gwich'in IPA to English IPA escape_special: false @@ -209,12 +238,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: gwi-ipa_to_eng-ipa.json - authors: - Generated 2022-09-09 13:04:46.128081 + case_equivalencies: {} case_sensitive: false display_name: mic-ipa IPA to eng-ipa IPA escape_special: false @@ -223,12 +254,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: as-written rules_path: mic-ipa_to_eng-ipa.json - authors: - Generated 2021-03-08 17:26:16.371203 + case_equivalencies: {} case_sensitive: true display_name: iku-ipa IPA to eng-ipa IPA escape_special: false @@ -237,12 +270,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: iku-ipa_to_eng-ipa.json - authors: - Generated 2021-03-18 14:51:53.222666 + case_equivalencies: {} case_sensitive: false display_name: ikt-ipa IPA to eng-ipa IPA escape_special: false @@ -251,12 +286,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: ikt-ipa_to_eng-ipa.json - authors: - Generated 2021-03-18 16:13:29.007425 + case_equivalencies: {} case_sensitive: false display_name: iku-sro-ipa IPA to eng-ipa IPA escape_special: false @@ -265,12 +302,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: iku-sro-ipa_to_eng-ipa.json - authors: - Generated 2021-03-23 11:55:07.200851 + case_equivalencies: {} case_sensitive: false display_name: haa-ipa IPA to eng-ipa IPA escape_special: false @@ -279,12 +318,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: haa-ipa_to_eng-ipa.json - authors: - Generated 2021-03-07 23:35:39.198455 + case_equivalencies: {} case_sensitive: false display_name: ttm-ipa IPA to eng-ipa IPA escape_special: false @@ -293,12 +334,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: ttm-ipa_to_eng-ipa.json - authors: - Generated 2021-08-01 19:37:38.316339 + case_equivalencies: {} case_sensitive: false display_name: tau-ipa IPA to eng-ipa IPA escape_special: false @@ -307,12 +350,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: tau-ipa_to_eng-ipa.json - authors: - Generated 2023-08-15 12:26:00.528774 + case_equivalencies: {} case_sensitive: false display_name: moh-ipa IPA to eng-ipa IPA escape_special: false @@ -321,12 +366,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: moh-ipa_to_eng-ipa.json - authors: - Generated 2022-04-12 11:42:52.894700 + case_equivalencies: {} case_sensitive: false display_name: moh-ipa IPA to eng-ipa IPA escape_special: false @@ -335,12 +382,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: moh-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-04-11 16:24:52.967497 + case_equivalencies: {} case_sensitive: true display_name: moh-equiv dummy to dummy dummy escape_special: false @@ -349,12 +398,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: moh-equiv_to_dummy.json - authors: - Generated 2022-04-11 16:24:52.967497 + case_equivalencies: {} case_sensitive: true display_name: moh-equiv dummy to hamming dummy escape_special: false @@ -363,12 +414,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: moh-equiv_to_hamming-dummy.json - authors: - Generated 2022-04-12 10:47:28.528081 + case_equivalencies: {} case_sensitive: true display_name: str-equiv dummy to dummy dummy escape_special: false @@ -377,12 +430,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: str-equiv_to_dummy.json - authors: - Generated 2022-04-12 10:47:28.528081 + case_equivalencies: {} case_sensitive: true display_name: str-equiv dummy to hamming dummy escape_special: false @@ -391,12 +446,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: apply-longest-first rules_path: str-equiv_to_hamming-dummy.json - authors: - Generated 2022-04-12 10:57:49.409626 + case_equivalencies: {} case_sensitive: true display_name: und-ascii dummy to dummy dummy escape_special: false @@ -405,12 +462,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: und-ascii_to_dummy.json - authors: - Generated 2022-04-12 10:56:44.604694 + case_equivalencies: {} case_sensitive: true display_name: und-ascii dummy to dummy dummy escape_special: false @@ -419,12 +478,14 @@ mappings: norm_form: NFD out_delimiter: '' out_lang: hamming-dummy + preserve_case: false prevent_feeding: false reverse: false rule_ordering: as-written rules_path: und-ascii_to_hamming-dummy.json - authors: - Generated 2022-05-20 16:36:31.566493 + case_equivalencies: {} case_sensitive: true display_name: iku-ipa to hamming-eng-ipa escape_special: false @@ -433,12 +494,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: iku-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-05-20 16:55:13.188319 + case_equivalencies: {} case_sensitive: true display_name: und-ipa to hamming-eng-ipa escape_special: false @@ -447,12 +510,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: und-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-05-20 19:12:51.416268 + case_equivalencies: {} case_sensitive: true display_name: ikt-ipa to hamming-eng-ipa escape_special: false @@ -461,12 +526,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: hamming-eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: ikt-ipa_to_hamming-eng-ipa.json - authors: - Generated 2022-06-07 10:24:54.512807 + case_equivalencies: {} case_sensitive: true display_name: moe-ipa to eng-ipa escape_special: false @@ -475,12 +542,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: moe-ipa_to_eng-ipa.json - authors: - Generated 2022-07-22 13:54:50.454043 + case_equivalencies: {} case_sensitive: false display_name: win-ipa IPA to eng-ipa IPA escape_special: false @@ -489,6 +558,7 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first @@ -496,6 +566,7 @@ mappings: - authors: - Generated 2022-07-20 06:53:42.624015 - Updated by hand 2021-07-20 David Huggins-Daines to improve vowels + case_equivalencies: {} case_sensitive: false display_name: fin-ipa IPA to eng-ipa IPA escape_special: false @@ -504,12 +575,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: fin-ipa_to_eng-ipa.json - authors: - Generated 2023-01-13 11:12:51.451010 + case_equivalencies: {} case_sensitive: true display_name: oka-ipa to eng-ipa escape_special: false @@ -518,12 +591,14 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: oka-ipa_to_eng-ipa.json - authors: - Generated 2023-06-26 15:06:13.793560 + case_equivalencies: {} case_sensitive: true display_name: clm-ipa to eng-ipa escape_special: false @@ -532,7 +607,25 @@ mappings: norm_form: NFC out_delimiter: '' out_lang: eng-ipa + preserve_case: false prevent_feeding: true reverse: false rule_ordering: apply-longest-first rules_path: clm-ipa_to_eng-ipa.json + - authors: + - Generated 2024-04-18 13:30:09.143401 + - Manual fixes by Eric Joanis + case_equivalencies: {} + case_sensitive: true + display_name: sal-ipa to eng-ipa + escape_special: false + in_lang: sal-ipa + language_name: IPA + norm_form: NFC + out_delimiter: '' + out_lang: eng-ipa + preserve_case: false + prevent_feeding: true + reverse: false + rule_ordering: apply-longest-first + rules_path: sal-ipa_to_eng-ipa.json diff --git a/g2p/mappings/langs/generated/sal-ipa_to_eng-ipa.json b/g2p/mappings/langs/generated/sal-ipa_to_eng-ipa.json new file mode 100644 index 00000000..69cfb768 --- /dev/null +++ b/g2p/mappings/langs/generated/sal-ipa_to_eng-ipa.json @@ -0,0 +1,94 @@ +[ + {"in": "ˈa̠", "out": "æ"}, + {"in": "ˈə̠", "out": "ə"}, + {"in": "h̠ʷ", "out": "hw"}, + {"in": "kʼʷ", "out": "kw"}, + {"in": "qʼʷ", "out": "kw"}, + {"in": "ˈu̠", "out": "u"}, + {"in": "ʕˀʷ", "out": "ʒw"}, + {"in": "x̠ʷ", "out": "kw"}, + {"in": "tsʼ", "out": "ts"}, + {"in": "tsʼ", "out": "ts"}, + {"in": "tɬʼ", "out": "ts"}, + {"in": "ʧʼ", "out": "tʃ"}, + {"in": "χʷ", "out": "ʃw"}, + {"in": "ˈa", "out": "æ"}, + {"in": "a̠", "out": "æ"}, + {"in": "ˈæ", "out": "æ"}, + {"in": "c̠", "out": "k"}, + {"in": "ˈɛ", "out": "ɛ"}, + {"in": "ˈə", "out": "ə"}, + {"in": "ˈʌ", "out": "ʌ"}, + {"in": "ɡʷ", "out": "ɡw"}, + {"in": "ˈi", "out": "i"}, + {"in": "ɣˀ", "out": "ɡ"}, + {"in": "kʼ", "out": "k"}, + {"in": "kʷ", "out": "kw"}, + {"in": "lˀ", "out": "l"}, + {"in": "l̠", "out": "l"}, + {"in": "mˀ", "out": "m"}, + {"in": "nˀ", "out": "n"}, + {"in": "n̠", "out": "n"}, + {"in": "ˈo", "out": "ɔ"}, + {"in": "ˈo", "out": "ɔ"}, + {"in": "ˈɔ", "out": "ɔ"}, + {"in": "pʼ", "out": "p"}, + {"in": "qʼ", "out": "k"}, + {"in": "qʷ", "out": "kw"}, + {"in": "ɾˀ", "out": "ɾ"}, + {"in": "s̠", "out": "s"}, + {"in": "tʼ", "out": "t"}, + {"in": "tʼ", "out": "t"}, + {"in": "ʕˀ", "out": "ʒ"}, + {"in": "ˈu", "out": "u"}, + {"in": "ˈu", "out": "u"}, + {"in": "u̠", "out": "u"}, + {"in": "ʕʷ", "out": "ʒw"}, + {"in": "x̠", "out": "k"}, + {"in": "xʷ", "out": "kw"}, + {"in": "wˀ", "out": "w"}, + {"in": "jˀ", "out": "j"}, + {"in": "zˀ", "out": "z"}, + {"in": "zˀ", "out": "z"}, + {"in": "ts", "out": "ts"}, + {"in": "ʧ", "out": "tʃ"}, + {"in": "ʤ", "out": "dʒ"}, + {"in": "h", "out": "h"}, + {"in": "χ", "out": "ʃ"}, + {"in": "a", "out": "æ"}, + {"in": "æ", "out": "æ"}, + {"in": "θ", "out": "θ"}, + {"in": "ᶿ", "out": ""}, + {"in": "ɛ", "out": "ɛ"}, + {"in": "ʌ", "out": "ʌ"}, + {"in": "ə", "out": "ə"}, + {"in": "ə", "out": "ə"}, + {"in": "ɡ", "out": "ɡ"}, + {"in": "h", "out": "h"}, + {"in": "i", "out": "i"}, + {"in": "ɣ", "out": "ɡ"}, + {"in": "k", "out": "k"}, + {"in": "l", "out": "l"}, + {"in": "ɬ", "out": "s"}, + {"in": "ɬ", "out": "s"}, + {"in": "m", "out": "m"}, + {"in": "n", "out": "n"}, + {"in": "ŋ", "out": "ŋ"}, + {"in": "o", "out": "ɔ"}, + {"in": "ɔ", "out": "ɔ"}, + {"in": "p", "out": "p"}, + {"in": "q", "out": "k"}, + {"in": "ɾ", "out": "ɾ"}, + {"in": "s", "out": "s"}, + {"in": "t", "out": "t"}, + {"in": "ʕ", "out": "ʒ"}, + {"in": "u", "out": "u"}, + {"in": "x", "out": "k"}, + {"in": "w", "out": "w"}, + {"in": "j", "out": "j"}, + {"in": "w", "out": "w"}, + {"in": "ʷ", "out": "w"}, + {"in": "ʡ", "out": "ʔ"}, + {"in": "ʔ", "out": "ʔ"}, + {"in": "ˀ", "out": ""} +] diff --git a/g2p/mappings/langs/sal/config-g2p.yaml b/g2p/mappings/langs/sal/config-g2p.yaml new file mode 100644 index 00000000..eaa0ab66 --- /dev/null +++ b/g2p/mappings/langs/sal/config-g2p.yaml @@ -0,0 +1,23 @@ +<<: &shared + language_name: Salishan languages with APA-based writing systems +mappings: + - display_name: Salishan APA to IPA + rules_path: sal_apa_to_ipa.csv + in_lang: sal-apa-equiv + out_lang: sal-ipa + authors: + - Tony Mattina + - Eric Joanis + type: mapping + prevent_feeding: true + rule_ordering: apply-longest-first + norm_form: NFD + <<: *shared + - display_name: Unicode Equivalencies + in_lang: sal-apa + out_lang: sal-apa-equiv + authors: + - Eric Joanis + rules_path: sal_equiv.csv + norm_form: NFD + <<: *shared diff --git a/g2p/mappings/langs/sal/sal_apa_to_ipa.csv b/g2p/mappings/langs/sal/sal_apa_to_ipa.csv new file mode 100644 index 00000000..a45a2aa3 --- /dev/null +++ b/g2p/mappings/langs/sal/sal_apa_to_ipa.csv @@ -0,0 +1,96 @@ +a,a,, +a',ˈa,, +á,ˈa,, +ạ,a̠,, +ạ́,ˈa̠,, +æ,æ,, +æ',ˈæ,, +c,ts,, +c̣,c̠,, +θ,θ,, +ᶿ,ᶿ,, +c',tsʼ,, +c̓,tsʼ,, +č,ʧ,, +č̓,ʧʼ,, +ǰ,ʤ,, +ɛ,ɛ,, +ɛ́,ˈɛ,, +ʌ,ʌ,, +e,ə,, +é,ˈə,, +ẹ́,ˈə̠,, +ə,ə,, +ə́,ˈə,, +ə̣́,ˈə̠,, +ʌ́,ˈʌ,, +g,ɡ,, +gʷ,ɡʷ,, +h,h,, +ḥ,h,, +ḥʷ,h̠ʷ,, +i,i,, +i',ˈi,, +ɣ,ɣ,, +ɣ',ɣˀ,, +k,k,, +k',kʼ,, +kʷ,kʷ,, +k'ʷ,kʼʷ,, +l,l,, +l',lˀ,, +ḷ,l̠,, +ɬ,ɬ,, +ł,ɬ,, +ƛ',tɬʼ,, +m,m,, +m',mˀ,, +n,n,, +n',nˀ,, +ṇ,n̠,, +ŋ,ŋ,, +o,o,, +o',ˈo,, +ó,ˈo,, +ɔ,ɔ,, +ɔ́,ˈɔ,, +p,p,, +p',pʼ,, +q,q,, +q',qʼ,, +qʷ,qʷ,, +q'ʷ,qʼʷ,, +r,ɾ,, +r',ɾˀ,, +s,s,, +ṣ,s̠,, +š,ʃ,, +t,t,, +t',tʼ,, +ť,tʼ,, +ʕ,ʕ,, +ʕ',ʕˀ,, +u,u,, +u',ˈu,, +ú,ˈu,, +ụ,u̠,, +ụ́,ˈu̠,, +ʕʷ,ʕʷ,, +ʕ'ʷ,ʕˀʷ,, +x̌,χ,, +x,x,, +x̣,x̠,, +x̣ʷ,x̠ʷ,, +w,w,, +y,j,, +x̌ʷ,χʷ,, +xʷ,xʷ,, +w',wˀ,, +w,w,, +y',jˀ,, +ʷ,ʷ,, +ʡ,ʡ,, +ʔ,ʔ,, +ˀ,ˀ,, +z',zˀ,, +z̓,zˀ,, diff --git a/g2p/mappings/langs/sal/sal_equiv.csv b/g2p/mappings/langs/sal/sal_equiv.csv new file mode 100644 index 00000000..ab926a91 --- /dev/null +++ b/g2p/mappings/langs/sal/sal_equiv.csv @@ -0,0 +1,4 @@ +’,' +‘,' +\u0315,' +\u0301{1}\u0323{2},\u0323{2}\u0301{1} diff --git a/g2p/tests/public/data/sal-arpabet.tsv b/g2p/tests/public/data/sal-arpabet.tsv new file mode 100644 index 00000000..97dd9b19 --- /dev/null +++ b/g2p/tests/public/data/sal-arpabet.tsv @@ -0,0 +1,105 @@ +# some more sample words +sal-apa eng-arpabet nə́c̕uʔ N AH T S UW HH +sal-apa eng-arpabet čə́saʔ CH AH S AE HH +sal-apa eng-arpabet ɬixʷ S IY K W +sal-apa eng-arpabet ŋús NG UW S +sal-apa eng-arpabet ɬq̕áčš S K AE CH SH +sal-apa eng-arpabet t̕χə́ŋ CH AH NG +sal-apa eng-arpabet c̕úʔkʷs T S UW HH K W S +sal-apa eng-arpabet táʔcs T AE HH T S S +sal-apa eng-arpabet tə́kʷxʷ T AH K W K W +sal-apa eng-arpabet ʔúpən HH UW P AH N +sal-apa eng-arpabet t̕ᶿáʔkʷəs T AE HH K W AH S +sal-apa eng-arpabet téʔθəs T AH HH TH AH S +sal-apa eng-arpabet nə́t̕ᶿəʔ N AH T AH HH +sal-apa eng-arpabet ɬəxʷɬšeʔ S AH K W S SH AH HH + +# Validate the impact of feeding +sal-apa eng-arpabet a AE +sal-apa eng-arpabet a' AE +sal-apa eng-arpabet ạ AE +sal-apa eng-arpabet ạ́ AE +sal-apa eng-arpabet æ AE +sal-apa eng-arpabet æ' AE +sal-apa eng-arpabet c T S +sal-apa eng-arpabet c̣ K +sal-apa eng-arpabet θ TH +sal-apa eng-arpabet c' T S +sal-apa eng-arpabet č CH +sal-apa eng-arpabet č̓ CH +sal-apa eng-arpabet ǰ JH +sal-apa eng-arpabet ɛ EH +sal-apa eng-arpabet ɛ́ EH +sal-apa eng-arpabet ʌ AH +sal-apa eng-arpabet ə AH +sal-apa eng-arpabet e AH +sal-apa eng-arpabet ə́ AH +sal-apa eng-arpabet ə̣́ AH +sal-apa eng-arpabet ʌ́ AH +sal-apa eng-arpabet g G +sal-apa eng-arpabet gʷ G W +sal-apa eng-arpabet h HH +sal-apa eng-arpabet ḥ HH +sal-apa eng-arpabet ḥʷ HH W +sal-apa eng-arpabet i IY +sal-apa eng-arpabet i' IY +sal-apa eng-arpabet ɣ G +sal-apa eng-arpabet ɣ' G +sal-apa eng-arpabet k K +sal-apa eng-arpabet k' K +sal-apa eng-arpabet kʷ K W +sal-apa eng-arpabet k'ʷ K W +sal-apa eng-arpabet l L +sal-apa eng-arpabet l' L +sal-apa eng-arpabet ḷ L +sal-apa eng-arpabet ɬ S +sal-apa eng-arpabet ł S +sal-apa eng-arpabet ƛ' T S +sal-apa eng-arpabet m M +sal-apa eng-arpabet m' M +sal-apa eng-arpabet n N +sal-apa eng-arpabet n' N +sal-apa eng-arpabet ṇ N +sal-apa eng-arpabet ŋ NG +sal-apa eng-arpabet o AO +sal-apa eng-arpabet o' AO +sal-apa eng-arpabet ó AO +sal-apa eng-arpabet ɔ AO +sal-apa eng-arpabet ɔ́ AO +sal-apa eng-arpabet p P +sal-apa eng-arpabet p' P +sal-apa eng-arpabet q K +sal-apa eng-arpabet q' K +sal-apa eng-arpabet qʷ K W +sal-apa eng-arpabet q'ʷ K W +sal-apa eng-arpabet r D +sal-apa eng-arpabet r' D +sal-apa eng-arpabet s S +sal-apa eng-arpabet ṣ S +sal-apa eng-arpabet t T +sal-apa eng-arpabet t' T +sal-apa eng-arpabet ť T +sal-apa eng-arpabet ʕ ZH +sal-apa eng-arpabet ʕ' ZH +sal-apa eng-arpabet u UW +sal-apa eng-arpabet u' UW +sal-apa eng-arpabet ú UW +sal-apa eng-arpabet ụ UW +sal-apa eng-arpabet ụ́ UW +sal-apa eng-arpabet ʕʷ ZH W +sal-apa eng-arpabet ʕ'ʷ ZH W +sal-apa eng-arpabet x̌ SH +sal-apa eng-arpabet x K +sal-apa eng-arpabet x̣ K +sal-apa eng-arpabet x̣ʷ K W +sal-apa eng-arpabet w W +sal-apa eng-arpabet y Y +sal-apa eng-arpabet x̌ʷ SH W +sal-apa eng-arpabet xʷ K W +sal-apa eng-arpabet w' W +sal-apa eng-arpabet w W +sal-apa eng-arpabet y' Y +sal-apa eng-arpabet ʷ W +sal-apa eng-arpabet ʡ HH +sal-apa eng-arpabet ʔ HH +sal-apa eng-arpabet z' Z diff --git a/g2p/tests/public/data/sal.tsv b/g2p/tests/public/data/sal.tsv new file mode 100644 index 00000000..ba6fa73d --- /dev/null +++ b/g2p/tests/public/data/sal.tsv @@ -0,0 +1,107 @@ +# some more sample words +sal-apa sal-ipa nə́c̕uʔ nˈətsʼuʔ clm one +sal-apa sal-ipa čə́saʔ ʧˈəsaʔ clm two +sal-apa sal-ipa ɬixʷ ɬixʷ clm three +sal-apa sal-ipa ŋús ŋˈus clm four +sal-apa sal-ipa ɬq̕áčš ɬqʼˈaʧʃ clm five +sal-apa sal-ipa t̕χə́ŋ tʼχˈəŋ clm six +sal-apa sal-ipa c̕úʔkʷs tsʼˈuʔkʷs clm seven +sal-apa sal-ipa táʔcs tˈaʔtss clm eight +sal-apa sal-ipa tə́kʷxʷ tˈəkʷxʷ clm nine +sal-apa sal-ipa ʔúpən ʔˈupən clm ten +sal-apa sal-ipa t̕ᶿáʔkʷəs tʼᶿˈaʔkʷəs Saanich seven +sal-apa sal-ipa téʔθəs tˈəʔθəs Saanich eight +sal-apa sal-ipa nə́t̕ᶿəʔ nˈətʼᶿəʔ Saanich one +sal-apa sal-ipa ɬəxʷɬšeʔ ɬəxʷɬʃəʔ Saanich thirty + +# Validate the impact of feeding +sal-apa sal-ipa a a +sal-apa sal-ipa a' ˈa +sal-apa sal-ipa ạ a̠ +sal-apa sal-ipa ạ́ ˈa̠ +sal-apa sal-ipa æ æ +sal-apa sal-ipa æ' ˈæ +sal-apa sal-ipa c ts +sal-apa sal-ipa c̣ c̠ +sal-apa sal-ipa θ θ +sal-apa sal-ipa c' tsʼ +sal-apa sal-ipa č ʧ +sal-apa sal-ipa č̓ ʧʼ +sal-apa sal-ipa ǰ ʤ +sal-apa sal-ipa ɛ ɛ +sal-apa sal-ipa ɛ́ ˈɛ +sal-apa sal-ipa ʌ ʌ +sal-apa sal-ipa ə ə +sal-apa sal-ipa e ə +sal-apa sal-ipa ə́ ˈə +sal-apa sal-ipa ə̣́ ˈə̠ +sal-apa sal-ipa ʌ́ ˈʌ +sal-apa sal-ipa g ɡ +sal-apa sal-ipa gʷ ɡʷ +sal-apa sal-ipa h h +sal-apa sal-ipa ḥ h +sal-apa sal-ipa ḥʷ h̠ʷ +sal-apa sal-ipa i i +sal-apa sal-ipa i' ˈi +sal-apa sal-ipa ɣ ɣ +sal-apa sal-ipa ɣ' ɣˀ +sal-apa sal-ipa k k +sal-apa sal-ipa k' kʼ +sal-apa sal-ipa kʷ kʷ +sal-apa sal-ipa k'ʷ kʼʷ +sal-apa sal-ipa l l +sal-apa sal-ipa l' lˀ +sal-apa sal-ipa ḷ l̠ +sal-apa sal-ipa ɬ ɬ +sal-apa sal-ipa ł ɬ +sal-apa sal-ipa ƛ' tɬʼ +sal-apa sal-ipa m m +sal-apa sal-ipa m' mˀ +sal-apa sal-ipa n n +sal-apa sal-ipa n' nˀ +sal-apa sal-ipa ṇ n̠ +sal-apa sal-ipa ŋ ŋ +sal-apa sal-ipa o o +sal-apa sal-ipa o' ˈo +sal-apa sal-ipa ó ˈo +sal-apa sal-ipa ɔ ɔ +sal-apa sal-ipa ɔ́ ˈɔ +sal-apa sal-ipa p p +sal-apa sal-ipa p' pʼ +sal-apa sal-ipa q q +sal-apa sal-ipa q' qʼ +sal-apa sal-ipa qʷ qʷ +sal-apa sal-ipa q'ʷ qʼʷ +sal-apa sal-ipa r ɾ +sal-apa sal-ipa r' ɾˀ +sal-apa sal-ipa s s +sal-apa sal-ipa ṣ s̠ +sal-apa sal-ipa t t +sal-apa sal-ipa t' tʼ +sal-apa sal-ipa ť tʼ +sal-apa sal-ipa ʕ ʕ +sal-apa sal-ipa ʕ' ʕˀ +sal-apa sal-ipa u u +sal-apa sal-ipa u' ˈu +sal-apa sal-ipa ú ˈu +sal-apa sal-ipa ụ u̠ +sal-apa sal-ipa ụ́ ˈu̠ +sal-apa sal-ipa ʕʷ ʕʷ +sal-apa sal-ipa ʕ'ʷ ʕˀʷ +sal-apa sal-ipa x̌ χ +sal-apa sal-ipa x x +sal-apa sal-ipa x̣ x̠ +sal-apa sal-ipa x̣ʷ x̠ʷ +sal-apa sal-ipa w w +sal-apa sal-ipa y j +sal-apa sal-ipa x̌ʷ χʷ +sal-apa sal-ipa xʷ xʷ +sal-apa sal-ipa w' wˀ +sal-apa sal-ipa w w +sal-apa sal-ipa y' jˀ +sal-apa sal-ipa ʷ ʷ +sal-apa sal-ipa ʡ ʡ +sal-apa sal-ipa ʔ ʔ +sal-apa sal-ipa ˀ ˀ +sal-apa sal-ipa z' zˀ +sal-apa sal-ipa z̓ zˀ