From a76effb418aac183f2f7f77b329c975ad1aaecd4 Mon Sep 17 00:00:00 2001 From: Kylie McClain Date: Mon, 29 Aug 2022 11:43:48 -0400 Subject: [PATCH] o ante e nasin pali tawa nasin sin - o kepeken nimi Linku - o wan e ijo tan nimi Linku e ijo tan pali ni --- .gitignore | 3 + Makefile | 37 +- README.adoc | 2 +- README.en.adoc | 2 +- augment.json | 650 ++++++++++++++++++++++++++++++++++ augment_languages.json | 80 +++++ augment_names.json | 14 + augment_places.json | 179 ++++++++++ augment_transliterations.json | 9 + filter_linku.jq | 62 ++++ generate_dic.jq | 51 +++ tok.aff | 33 +- tok.dic | 125 ------- 13 files changed, 1114 insertions(+), 133 deletions(-) create mode 100644 augment.json create mode 100644 augment_languages.json create mode 100644 augment_names.json create mode 100644 augment_places.json create mode 100644 augment_transliterations.json create mode 100755 filter_linku.jq create mode 100755 generate_dic.jq delete mode 100644 tok.dic diff --git a/.gitignore b/.gitignore index 1c25f3a..faa62df 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ +data.json tok.bdic +tok.dic +hunspell-tok-*.tar.gz diff --git a/Makefile b/Makefile index ca852f0..4bcf3cd 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,48 @@ +.DELETE_ON_ERROR: + prefix ?= /usr/local datadir ?= ${prefix}/share dictdir ?= ${datadir}/hunspell bdicdir ?= ${datadir}/qt/qtwebengine_dictionaries -all: +linku ?= https://lipu-linku.github.io/jasima/data.json + +dev: data.json all + +all: tok.dic + +data.json: filter_linku.jq +data.json: augment.json augment_languages.json augment_places.json + curl -Lf ${linku} \ + | jq -s '.[0] * .[1] * .[2] * .[3] * .[4] * .[5]' \ + - \ + ./augment.json \ + ./augment_places.json \ + ./augment_languages.json \ + ./augment_names.json \ + ./augment_transliterations.json \ + | jq -cf ./filter_linku.jq > data.json + +tok.dic: data.json generate_dic.jq + jq -rf ./generate_dic.jq < data.json > tok.dic tok.bdic: tok.aff tok.dic qwebengine_convert_dict tok.aff tok.bdic -bdic: tok.bdic +dist: clean dev + tag=$$(git rev-list --count --since=yesterday --until=tomorrow HEAD | wc -l); \ + [ "$$tag" -gt 1 ] && tag=".$$tag" || tag=; \ + tag="$$(date +%Y%m%d)$$tag"; \ + git tag -fs "$$tag"; \ + git archive \ + --format=tar \ + --prefix="hunspell-tok-$$tag"/ \ + --add-file=./tok.dic \ + HEAD \ + | gzip -9 > "hunspell-tok-$$tag".tar.gz clean: - rm -f tok.bdic + rm -f tok.bdic tok.dic data.json install: tok.aff tok.dic install -d ${DESTDIR}${dictdir} diff --git a/README.adoc b/README.adoc index 8d06ea1..575501a 100644 --- a/README.adoc +++ b/README.adoc @@ -24,6 +24,6 @@ sina kama jo e ni lon ilo sona sina la, sina ken kepeken ni lon ilo `hunspell(1) == nimi lon poki ni li jo e nimi ale tan {linku}[nimi Linku]. poki ni li sin lili e ni -kepeken nimi ma, kepeken nimi toki. +kepeken nimi ma, kepeken nimi toki, kepeken nimi jan pi toki pona. :linku: https://lipu-linku.github.io/ diff --git a/README.en.adoc b/README.en.adoc index ccd4932..e643755 100644 --- a/README.en.adoc +++ b/README.en.adoc @@ -20,6 +20,6 @@ You can install this and use it like any other `hunspell(1)` dictionary. == Words included This dictionary contains all the words from {linku}[nimi Linku], with the addition of common -place names and language names. +place names, language names, and Toki Pona names. :linku: https://lipu-linku.github.io/ diff --git a/augment.json b/augment.json new file mode 100644 index 0000000..1037874 --- /dev/null +++ b/augment.json @@ -0,0 +1,650 @@ +{ + "data": { + "a": { + "pos": [ + "particle" + ] + }, + "akesi": { + "pos": [ + "noun" + ] + }, + "ala": { + "pos": [ + "adjective" + ] + }, + "alasa": { + "pos": [ + "verb" + ] + }, + "ale": { + "flags": [ + "ph:ali" + ], + "pos": [ + "adjective", + "noun", + "number" + ] + }, + "ali": { + "pos": [ + "adjective", + "noun", + "number" + ] + }, + "anpa": { + "pos": [ + "adjective" + ] + }, + "ante": { + "pos": [ + "adjective" + ] + }, + "anu": { + "pos": [ + "particle" + ] + }, + "awen": { + "pos": [ + "adjective" + ] + }, + "e": { + "pos": [ + "particle" + ] + }, + "en": { + "pos": [ + "particle" + ] + }, + "esun": { + "pos": [ + "noun" + ] + }, + "ijo": { + "pos": [ + "noun" + ] + }, + "ike": { + "pos": [ + "adjective" + ] + }, + "ilo": { + "pos": [ + "noun" + ] + }, + "insa": { + "pos": [ + "noun" + ] + }, + "jaki": { + "pos": [ + "adjective" + ] + }, + "jan": { + "pos": [ + "noun" + ] + }, + "jelo": { + "pos": [ + "adjective" + ] + }, + "jo": { + "pos": [ + "verb" + ] + }, + "kala": { + "pos": [ + "noun" + ] + }, + "kalama": { + "pos": [ + "verb" + ] + }, + "kama": { + "pos": [ + "adjective", + "preverb" + ] + }, + "kasi": { + "pos": [ + "noun" + ] + }, + "ken": { + "pos": [ + "preverb", + "adjective" + ] + }, + "kepeken": { + "pos": [ + "preposition" + ] + }, + "kili": { + "pos": [ + "noun" + ] + }, + "kin": { + "pos": [ + "particle" + ] + }, + "kiwen": { + "pos": [ + "noun" + ] + }, + "ko": { + "pos": [ + "noun" + ] + }, + "kon": { + "pos": [ + "noun" + ] + }, + "kule": { + "pos": [ + "adjective" + ] + }, + "kulupu": { + "pos": [ + "noun" + ] + }, + "kute": { + "pos": [ + "noun", + "verb" + ] + }, + "la": { + "pos": [ + "particle" + ] + }, + "lape": { + "pos": [ + "adjective" + ] + }, + "laso": { + "pos": [ + "adjective" + ] + }, + "lawa": { + "pos": [ + "adjective", + "verb" + ] + }, + "len": { + "pos": [ + "noun" + ] + }, + "lete": { + "pos": [ + "adjective" + ] + }, + "li": { + "pos": [ + "particle" + ] + }, + "lili": { + "pos": [ + "adjective" + ] + }, + "linja": { + "pos": [ + "noun" + ] + }, + "lipu": { + "pos": [ + "noun" + ] + }, + "loje": { + "pos": [ + "adjective" + ] + }, + "lon": { + "pos": [ + "preposition" + ] + }, + "luka": { + "pos": [ + "noun", + "number" + ] + }, + "lukin": { + "pos": [ + "noun", + "verb", + "preverb" + ] + }, + "lupa": { + "pos": [ + "noun" + ] + }, + "ma": { + "pos": [ + "noun" + ] + }, + "mama": { + "pos": [ + "noun" + ] + }, + "mani": { + "pos": [ + "noun" + ] + }, + "meli": { + "pos": [ + "noun" + ] + }, + "mi": { + "pos": [ + "noun" + ] + }, + "mije": { + "pos": [ + "noun" + ] + }, + "moku": { + "pos": [ + "verb" + ] + }, + "moli": { + "pos": [ + "adjective" + ] + }, + "monsi": { + "pos": [ + "noun" + ] + }, + "mu": { + "pos": [ + "particle" + ] + }, + "mun": { + "pos": [ + "noun" + ] + }, + "musi": { + "pos": [ + "adjective" + ] + }, + "mute": { + "pos": [ + "adjective", + "noun" + ] + }, + "namako": { + "pos": [ + "adjective" + ] + }, + "nanpa": { + "pos": [ + "particle", + "noun" + ] + }, + "nasa": { + "pos": [ + "adjective" + ] + }, + "nasin": { + "pos": [ + "noun" + ] + }, + "nena": { + "pos": [ + "noun" + ] + }, + "ni": { + "pos": [ + "adjective" + ] + }, + "nimi": { + "pos": [ + "noun" + ] + }, + "noka": { + "pos": [ + "noun" + ] + }, + "o": { + "pos": [ + "particle" + ] + }, + "oko": { + "pos": [ + "noun", + "verb", + "preverb" + ] + }, + "olin": { + "pos": [ + "verb" + ] + }, + "ona": { + "pos": [ + "noun" + ] + }, + "open": { + "pos": [ + "verb" + ] + }, + "pakala": { + "pos": [ + "adjective" + ] + }, + "pali": { + "pos": [ + "verb" + ] + }, + "palisa": { + "pos": [ + "noun" + ] + }, + "pan": { + "pos": [ + "noun" + ] + }, + "pana": { + "pos": [ + "verb" + ] + }, + "pi": { + "pos": [ + "particle" + ] + }, + "pilin": { + "pos": [ + "noun", + "adjective" + ] + }, + "pimeja": { + "pos": [ + "adjective" + ] + }, + "pini": { + "pos": [ + "adjective" + ] + }, + "pipi": { + "pos": [ + "noun" + ] + }, + "poka": { + "pos": [ + "noun" + ] + }, + "poki": { + "pos": [ + "noun" + ] + }, + "pona": { + "pos": [ + "adjective" + ] + }, + "pu": { + "pos": [ + "adjective" + ] + }, + "sama": { + "pos": [ + "adjective", + "preposition" + ] + }, + "seli": { + "pos": [ + "noun" + ] + }, + "selo": { + "pos": [ + "noun" + ] + }, + "seme": { + "pos": [ + "particle" + ] + }, + "sewi": { + "pos": [ + "noun", + "adjective" + ] + }, + "sijelo": { + "pos": [ + "noun" + ] + }, + "sike": { + "pos": [ + "noun", + "adjective" + ] + }, + "sin": { + "pos": [ + "adjective" + ] + }, + "sina": { + "pos": [ + "noun" + ] + }, + "sinpin": { + "pos": [ + "noun" + ] + }, + "sitelen": { + "pos": [ + "noun" + ] + }, + "sona": { + "pos": [ + "verb", + "preverb" + ] + }, + "soweli": { + "pos": [ + "noun" + ] + }, + "suli": { + "pos": [ + "adjective" + ] + }, + "suno": { + "pos": [ + "noun" + ] + }, + "supa": { + "pos": [ + "noun" + ] + }, + "suwi": { + "pos": [ + "adjective" + ] + }, + "tan": { + "pos": [ + "preposition" + ] + }, + "taso": { + "pos": [ + "particle", + "adjective" + ] + }, + "tawa": { + "pos": [ + "preposition", + "adjective" + ] + }, + "telo": { + "pos": [ + "noun" + ] + }, + "tenpo": { + "pos": [ + "noun" + ] + }, + "toki": { + "pos": [ + "verb" + ] + }, + "tomo": { + "pos": [ + "noun" + ] + }, + "tu": { + "pos": [ + "number" + ] + }, + "unpa": { + "pos": [ + "verb" + ] + }, + "uta": { + "pos": [ + "noun" + ] + }, + "utala": { + "pos": [ + "verb" + ] + }, + "walo": { + "pos": [ + "adjective" + ] + }, + "wan": { + "pos": [ + "adjective", + "number" + ] + }, + "waso": { + "pos": [ + "noun" + ] + }, + "wawa": { + "pos": [ + "adjective" + ] + }, + "weka": { + "pos": [ + "adjective" + ] + }, + "wile": { + "pos": [ + "preverb" + ] + } + } +} diff --git a/augment_languages.json b/augment_languages.json new file mode 100644 index 0000000..d6ce70d --- /dev/null +++ b/augment_languages.json @@ -0,0 +1,80 @@ +{ + "languages": [ + "Alapi", + "Apikan", + "Awasa", + "Awisi", + "Elena", + "Epanja", + "Esi", + "Esuka", + "Inli", + "Insi", + "Intonesija", + "Inu", + "Ipo", + "Isilan", + "Italija", + "Iwisi", + "Jolupa", + "Kalike", + "Kanse", + "Kantun", + "Kinla", + "Lasina", + "Lomani", + "Losi", + "Lowasi", + "Mosijo", + "Netelan", + "Nijon", + "Nosiki", + "Panla", + "Peson", + "Pokasi", + "Posan", + "Potuke", + "Sameka", + "Seki", + "Sesi", + "Sikipe", + "Sonko", + "Sopisi", + "Sumi", + "Tansi", + "Topisin", + "Tosi", + "Kanse", + "Kepeka", + "Mewika", + "Oselija", + "Piten", + "Sonko", + "Tosi", + "Inli", + "Apiwili", + "Epelanto", + "Inli", + "Inota", + "Intelinwa", + "Ito", + "Kuwenja", + "Latan", + "Losupan", + "Mansi", + "Nawi", + "Olapi", + "Palepelen", + "Pasiki", + "Selen", + "Semisi", + "Sinan", + "Sintalin", + "Anlasi", + "Pisinpo", + "Soleso", + "Soma", + "Tolome", + "Tosulaki" + ] +} diff --git a/augment_names.json b/augment_names.json new file mode 100644 index 0000000..8fc8945 --- /dev/null +++ b/augment_names.json @@ -0,0 +1,14 @@ +{ + "names": [ + "Asi", + "Juli", + "Kali", + "Komi", + "Lope", + "Ne", + "Sa", + "Sonja", + "Tepo", + "Wintu" + ] +} diff --git a/augment_places.json b/augment_places.json new file mode 100644 index 0000000..750dfd2 --- /dev/null +++ b/augment_places.json @@ -0,0 +1,179 @@ +{ + "places": [ + "Amelika", + "Antasika", + "Apika", + "Asija", + "Elopa", + "Osejanija", + "Ankola", + "Eliteja", + "Isijopija", + "Kamelun", + "Kana", + "Kanpija", + "Kapon", + "Kenja", + "Kilipasi", + "Kine", + "Kinejekatolija", + "Kinepisa", + "Komo", + "Konko", + "Kosiwa", + "Lapewija", + "Lesoto", + "Lipija", + "Luwanta", + "Malakasi", + "Malawi", + "Mali", + "Malipe", + "Masu", + "Mosanpi", + "Mowisi", + "Mulitanija", + "Namipija", + "Naselija", + "Nise", + "Penen", + "Posuwana", + "Pukinapaso", + "Sanpija", + "Santapiken", + "Sasali", + "Sate", + "Sawasi", + "Seneka", + "Setapika", + "Sijelalijon", + "Sinpapuwe", + "Sipusi", + "Somalija", + "Sutan", + "Tansanija", + "Toko", + "Tunisi", + "Ukanta", + "Alensina", + "Awisi", + "Ekato", + "Kalalinuna", + "Kanata", + "Katemala", + "Kenata", + "Kosalika", + "Kupa", + "Mesiko", + "Mewika", + "Ontula", + "Palakawi", + "Panama", + "Papeto", + "Pasila", + "Pawama", + "Pelu", + "Pemuta", + "Penesuwela", + "Sameka", + "Sile", + "Sinita", + "Tominika", + "Ulukawi", + "Aja", + "Akanisan", + "Anku", + "Ilakija", + "Ilan", + "Intonesija", + "Isale", + "Jamanija", + "Kanpusi", + "Katelo", + "Kuli", + "Kusala", + "Kuwasi", + "Lanka", + "Losi", + "Lunpan", + "Malasija", + "Masu", + "Mijama", + "Nijon", + "Pakisan", + "Palani", + "Palata", + "Panla", + "Pilipina", + "Pilisin", + "Po", + "Sawusi", + "Sonko", + "Sulija", + "Tawi", + "Tuki", + "Uman", + "Utun", + "Wije", + "Alan", + "Antola", + "Elena", + "Epanja", + "Esalasi", + "Esi", + "Esuka", + "Inli", + "Isilan", + "Italija", + "Juke", + "Kalalinuna", + "Kanse", + "Katala", + "Katelo", + "Kinla", + "Kiposi", + "Lawi", + "Lijatuwa", + "Lisensan", + "Lomani", + "Losi", + "Lowasi", + "Lowenki", + "Lowensina", + "Lusepu", + "Maketonija", + "Mosijo", + "Motowa", + "Netelan", + "Nosiki", + "Pelalusi", + "Pesije", + "Peson", + "Pokasi", + "Posan", + "Posuka", + "Potuke", + "Samalino", + "Seki", + "Sipe", + "Sopisi", + "Sukosi", + "Sumi", + "Suwasi", + "Tansi", + "Tosi", + "Tuki", + "Ukawina", + "Wasikano", + "Wensa", + "Intonesija", + "Nusilan", + "Oselija", + "Papuwanijukini", + "Pisi", + "Samowa", + "Tona", + "Tuwalu", + "Wanuwatu" + ] +} diff --git a/augment_transliterations.json b/augment_transliterations.json new file mode 100644 index 0000000..4aa85f4 --- /dev/null +++ b/augment_transliterations.json @@ -0,0 +1,9 @@ +{ + "transliterations": [ + "Siko", + "Jutu", + "Wesi", + "Pesepu", + "Tuwita" + ] +} diff --git a/filter_linku.jq b/filter_linku.jq new file mode 100755 index 0000000..0e7b4f8 --- /dev/null +++ b/filter_linku.jq @@ -0,0 +1,62 @@ +#!/usr/bin/jq -rf + +# Takes a combined JSON object (jasima Linku JSON + augment*.json) +# as input, reshapes it, and filters it according to dictionary +# inclusion criteria. + +{ + words: + (.data + # Reshape data to be smaller and more standard. + | map_values( + { + word, + pos, + + # Remaining attributes are used for filtering, or flags + etymology, + def: (.def.en), + + # First recognition percentage is used as it's always the + # latest survey taken. + recognition: (first(.recognition[]? | tonumber) // null), + + book: (if .book == "none" then null else .book end), + commentary: .commentary + } + ) + # Exclude words matching certain criteria before processing further. + | map_values( + # select all... + select( + ( + # - words documented as typos; + ( .etymology // "" | startswith("typo ") ) + + # - words documented as reserved words + or ( .def // "" | test("\\bword reserved\\b") ) + + # - words deprecated by their creators + or ( .commentary // "" | test("\\bdeprecated\\b") ) + + # - words without a book *and* a recognition percentage + # of less than 1/3 of speakers, or no percentage + or ( + (.book == null) + and ( + (.recognition == null) + or ((.recognition // 0) < (1/3)*100) + ) + ) + ) + | not # and then invert the selection + ) + ) + ) + , names: ( + .places + + .languages + + .transliterations + + .names + ) +} diff --git a/generate_dic.jq b/generate_dic.jq new file mode 100755 index 0000000..c21666e --- /dev/null +++ b/generate_dic.jq @@ -0,0 +1,51 @@ +#!/usr/bin/jq -rf + +# Takes a filtered dictionary as input, and produces a hunspell(5) +# .dic file. + +# Prefix all the data with a word count, as per hunspell(5). +(.words | keys | length)+(.names | length), +(.words + # Do the actual data processing + | map( + .word + + "/kc" # Mark all words as not allowing changes in their case. + + ( + # Forbid suggesting words that are neither recognized by + # either >=66% of word survey respondants, nor in pu. + if ( + (.book == "pu") + or ( + (.recognition >= ((1/3)*2)*100) + ) + ) then + "" + else + "/ns" + end + ) + + ( + # If the word object contains an array with the parts of speech, + # include it. If it does not, just silently move on. + # Parts of speech are currently not stored in Linku itself, + # but rather comes from our augment.json, awaiting it appearing + # in nimi Linku someday. + if (.pos != null) then + (.pos | " po:" + join(" po:")) + else + "" + end + ) + # + ( + # # Add any additional hunspell flags. + # if (.flags != null) then + # (.flags | join(" ")) + # else + # "" + # end + # ) + )[] +) +, (.names + | map(. + "/kc po:name")[] +) diff --git a/tok.aff b/tok.aff index c476059..c943f8d 100644 --- a/tok.aff +++ b/tok.aff @@ -1,8 +1,35 @@ +# SPDX-License-Identifier: CC0-1.0 + +# TODO: This could use improving; namely... +# - words should *always* be lowercase; only proper nouns are uppercase +# - hunspell shouldn't suggest changes to uppercased versions of words +# when it encounters words that begin with uppercase letters. +# it should only offer to try and correct away from invalid syllables. + SET UTF-8 -TRY aeijklmnopstuw -KEY qwertyuiop|asdfghjkl|zxcvbnm +LANG tok + +# Always suggest lowercasing uppercased words; +# improve suggestions when words are missing spaces. +# With, "Kenla" suggests "ken la"; without, suggests "en la". +# Additionally, try to change words according to phonetic guidelines +# +KEY Aa|Ee|Ii|Jj|Kk|Ll|Mm|Nn|Oo|Pp|Ss|Tt|Uu|Ww|BP|bp|DT|dt|GK|gk + +# Make words suffixed with /kc always maintain their casing +KEEPCASE kc + +# Forbid suggesting words that are not recognized by more than +# 60% of respondants according to Linku's word survey data. +NOSUGGEST ns +# Forbid all word-compounding. BREAK 0 -KEEPCASE lc +# Always correct away from usages of wuwojiti, the four invalid syllables. +OCONV 4 +OCONV wu u +OCONV wo o +OCONV ji i +OCONV ti si diff --git a/tok.dic b/tok.dic deleted file mode 100644 index c88f868..0000000 --- a/tok.dic +++ /dev/null @@ -1,125 +0,0 @@ -124 -a/lc po:particle -akesi/lc po:noun -ala/lc po:adjective -alasa/lc po:verb -ale/lc po:adjective po:noun po:number -ali/lc po:adjective po:noun po:number -anpa/lc po:adjective -ante/lc po:adjective -anu/lc po:particle -awen/lc po:adjective -e/lc po:particle -en/lc po:particle -esun/lc po:noun -ijo/lc po:noun -ike/lc po:adjective -ilo/lc po:noun -insa/lc po:noun -jaki/lc po:adjective -jan/lc po:noun -jelo/lc po:adjective -jo/lc po:verb -kala/lc po:noun -kalama/lc po:verb -kama/lc po:adjective po:preverb -kasi/lc po:noun -ken/lc po:preverb po:adjective -kepeken/lc po:preposition -kili/lc po:noun -kin/lc po:particle -kiwen/lc po:noun -ko/lc po:noun -kon/lc po:noun -kule/lc po:adjective -kulupu/lc po:noun -kute/lc po:noun po:verb -la/lc po:particle -lape/lc po:adjective -laso/lc po:adjective -lawa/lc po:adjective po:verb -len/lc po:noun -lete/lc po:adjective -li/lc po:particle -lili/lc po:adjective -linja/lc po:noun -lipu/lc po:noun -loje/lc po:adjective -lon/lc po:preposition -luka/lc po:noun po:number -lukin/lc po:noun po:verb po:preverb -lupa/lc po:noun -ma/lc po:noun -mama/lc po:noun -mani/lc po:noun -meli/lc po:noun -mi/lc po:noun -mije/lc po:noun -moku/lc po:verb -moli/lc po:adjective -monsi/lc po:noun -mu/lc po:particle -mun/lc po:noun -musi/lc po:adjective -mute/lc po:adjective po:noun -namako/lc po:adjective -nanpa/lc po:particle po:noun -nasa/lc po:adjective -nasin/lc po:noun -nena/lc po:noun -ni/lc po:adjective -nimi/lc po:noun -noka/lc po:noun -o/lc po:particle -oko/lc po:noun po:verb po:preverb -olin/lc po:verb -ona/lc po:noun -open/lc po:verb -pakala/lc po:adjective -pali/lc po:verb -palisa/lc po:noun -pan/lc po:noun -pana/lc po:verb -pi/lc po:particle -pilin/lc po:noun po:adjective -pimeja/lc po:adjective -pini/lc po:adjective -pipi/lc po:noun -poka/lc po:noun -poki/lc po:noun -pona/lc po:adjective -pu/lc po:adjective -sama/lc po:adjective po:preposition -seli/lc po:noun -selo/lc po:noun -seme/lc po:particle -sewi/lc po:noun po:adjective -sijelo/lc po:noun -sike/lc po:noun po:adjective -sin/lc po:adjective -sina/lc po:noun -sinpin/lc po:noun -sitelen/lc po:noun -sona/lc po:verb po:preverb -soweli/lc po:noun -suli/lc po:adjective -suno/lc po:noun -supa/lc po:noun -suwi/lc po:adjective -tan/lc po:preposition -taso/lc po:particle po:adjective -tawa/lc po:preposition po:adjective -telo/lc po:noun -tenpo/lc po:noun -toki/lc po:verb -tomo/lc po:noun -tu/lc po:number -unpa/lc po:verb -uta/lc po:noun -utala/lc po:verb -walo/lc po:adjective -wan/lc po:adjective po:number -waso/lc po:noun -wawa/lc po:adjective -weka/lc po:adjective -wile/lc po:preverb