From a76effb418aac183f2f7f77b329c975ad1aaecd4 Mon Sep 17 00:00:00 2001
From: Kylie McClain <kylie@somas.is>
Date: Mon, 29 Aug 2022 11:43:48 -0400
Subject: [PATCH] o ante e nasin pali tawa nasin sin

- o kepeken nimi Linku
- o wan e ijo tan nimi Linku e ijo tan pali ni
---
 .gitignore                    |   3 +
 Makefile                      |  37 +-
 README.adoc                   |   2 +-
 README.en.adoc                |   2 +-
 augment.json                  | 650 ++++++++++++++++++++++++++++++++++
 augment_languages.json        |  80 +++++
 augment_names.json            |  14 +
 augment_places.json           | 179 ++++++++++
 augment_transliterations.json |   9 +
 filter_linku.jq               |  62 ++++
 generate_dic.jq               |  51 +++
 tok.aff                       |  33 +-
 tok.dic                       | 125 -------
 13 files changed, 1114 insertions(+), 133 deletions(-)
 create mode 100644 augment.json
 create mode 100644 augment_languages.json
 create mode 100644 augment_names.json
 create mode 100644 augment_places.json
 create mode 100644 augment_transliterations.json
 create mode 100755 filter_linku.jq
 create mode 100755 generate_dic.jq
 delete mode 100644 tok.dic

diff --git a/.gitignore b/.gitignore
index 1c25f3a..faa62df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
+data.json
 tok.bdic
+tok.dic
+hunspell-tok-*.tar.gz
diff --git a/Makefile b/Makefile
index ca852f0..4bcf3cd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,17 +1,48 @@
+.DELETE_ON_ERROR:
+
 prefix ?= /usr/local
 datadir ?= ${prefix}/share
 dictdir ?= ${datadir}/hunspell
 bdicdir ?= ${datadir}/qt/qtwebengine_dictionaries
 
-all:
+linku ?= https://lipu-linku.github.io/jasima/data.json
+
+dev: data.json all
+
+all: tok.dic
+
+data.json: filter_linku.jq
+data.json: augment.json augment_languages.json augment_places.json
+	curl -Lf ${linku} \
+	    | jq -s '.[0] * .[1] * .[2] * .[3] * .[4] * .[5]' \
+	        - \
+	        ./augment.json \
+	        ./augment_places.json \
+	        ./augment_languages.json \
+	        ./augment_names.json \
+	        ./augment_transliterations.json \
+	    | jq -cf ./filter_linku.jq > data.json
+
+tok.dic: data.json generate_dic.jq
+	jq -rf ./generate_dic.jq < data.json > tok.dic
 
 tok.bdic: tok.aff tok.dic
 	qwebengine_convert_dict tok.aff tok.bdic
 
-bdic: tok.bdic
+dist: clean dev
+	tag=$$(git rev-list --count --since=yesterday --until=tomorrow HEAD | wc -l); \
+	[ "$$tag" -gt 1 ] && tag=".$$tag" || tag=; \
+	tag="$$(date +%Y%m%d)$$tag"; \
+	git tag -fs "$$tag"; \
+	git archive \
+	    --format=tar \
+	    --prefix="hunspell-tok-$$tag"/ \
+	    --add-file=./tok.dic \
+	    HEAD \
+	    | gzip -9 > "hunspell-tok-$$tag".tar.gz
 
 clean:
-	rm -f tok.bdic
+	rm -f tok.bdic tok.dic data.json
 
 install: tok.aff tok.dic
 	install -d ${DESTDIR}${dictdir}
diff --git a/README.adoc b/README.adoc
index 8d06ea1..575501a 100644
--- a/README.adoc
+++ b/README.adoc
@@ -24,6 +24,6 @@ sina kama jo e ni lon ilo sona sina la, sina ken kepeken ni lon ilo `hunspell(1)
 == nimi lon
 
 poki ni li jo e nimi ale tan {linku}[nimi Linku]. poki ni li sin lili e ni
-kepeken nimi ma, kepeken nimi toki.
+kepeken nimi ma, kepeken nimi toki, kepeken nimi jan pi toki pona.
 
 :linku: https://lipu-linku.github.io/
diff --git a/README.en.adoc b/README.en.adoc
index ccd4932..e643755 100644
--- a/README.en.adoc
+++ b/README.en.adoc
@@ -20,6 +20,6 @@ You can install this and use it like any other `hunspell(1)` dictionary.
 == Words included
 
 This dictionary contains all the words from {linku}[nimi Linku], with the addition of common
-place names and language names.
+place names, language names, and Toki Pona names.
 
 :linku: https://lipu-linku.github.io/
diff --git a/augment.json b/augment.json
new file mode 100644
index 0000000..1037874
--- /dev/null
+++ b/augment.json
@@ -0,0 +1,650 @@
+{
+  "data": {
+    "a": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "akesi": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "ala": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "alasa": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "ale": {
+      "flags": [
+        "ph:ali"
+      ],
+      "pos": [
+        "adjective",
+        "noun",
+        "number"
+      ]
+    },
+    "ali": {
+      "pos": [
+        "adjective",
+        "noun",
+        "number"
+      ]
+    },
+    "anpa": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "ante": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "anu": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "awen": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "e": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "en": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "esun": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "ijo": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "ike": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "ilo": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "insa": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "jaki": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "jan": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "jelo": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "jo": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "kala": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "kalama": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "kama": {
+      "pos": [
+        "adjective",
+        "preverb"
+      ]
+    },
+    "kasi": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "ken": {
+      "pos": [
+        "preverb",
+        "adjective"
+      ]
+    },
+    "kepeken": {
+      "pos": [
+        "preposition"
+      ]
+    },
+    "kili": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "kin": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "kiwen": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "ko": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "kon": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "kule": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "kulupu": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "kute": {
+      "pos": [
+        "noun",
+        "verb"
+      ]
+    },
+    "la": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "lape": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "laso": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "lawa": {
+      "pos": [
+        "adjective",
+        "verb"
+      ]
+    },
+    "len": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "lete": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "li": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "lili": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "linja": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "lipu": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "loje": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "lon": {
+      "pos": [
+        "preposition"
+      ]
+    },
+    "luka": {
+      "pos": [
+        "noun",
+        "number"
+      ]
+    },
+    "lukin": {
+      "pos": [
+        "noun",
+        "verb",
+        "preverb"
+      ]
+    },
+    "lupa": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "ma": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "mama": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "mani": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "meli": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "mi": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "mije": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "moku": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "moli": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "monsi": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "mu": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "mun": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "musi": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "mute": {
+      "pos": [
+        "adjective",
+        "noun"
+      ]
+    },
+    "namako": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "nanpa": {
+      "pos": [
+        "particle",
+        "noun"
+      ]
+    },
+    "nasa": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "nasin": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "nena": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "ni": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "nimi": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "noka": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "o": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "oko": {
+      "pos": [
+        "noun",
+        "verb",
+        "preverb"
+      ]
+    },
+    "olin": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "ona": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "open": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "pakala": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "pali": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "palisa": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "pan": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "pana": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "pi": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "pilin": {
+      "pos": [
+        "noun",
+        "adjective"
+      ]
+    },
+    "pimeja": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "pini": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "pipi": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "poka": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "poki": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "pona": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "pu": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "sama": {
+      "pos": [
+        "adjective",
+        "preposition"
+      ]
+    },
+    "seli": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "selo": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "seme": {
+      "pos": [
+        "particle"
+      ]
+    },
+    "sewi": {
+      "pos": [
+        "noun",
+        "adjective"
+      ]
+    },
+    "sijelo": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "sike": {
+      "pos": [
+        "noun",
+        "adjective"
+      ]
+    },
+    "sin": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "sina": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "sinpin": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "sitelen": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "sona": {
+      "pos": [
+        "verb",
+        "preverb"
+      ]
+    },
+    "soweli": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "suli": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "suno": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "supa": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "suwi": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "tan": {
+      "pos": [
+        "preposition"
+      ]
+    },
+    "taso": {
+      "pos": [
+        "particle",
+        "adjective"
+      ]
+    },
+    "tawa": {
+      "pos": [
+        "preposition",
+        "adjective"
+      ]
+    },
+    "telo": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "tenpo": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "toki": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "tomo": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "tu": {
+      "pos": [
+        "number"
+      ]
+    },
+    "unpa": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "uta": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "utala": {
+      "pos": [
+        "verb"
+      ]
+    },
+    "walo": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "wan": {
+      "pos": [
+        "adjective",
+        "number"
+      ]
+    },
+    "waso": {
+      "pos": [
+        "noun"
+      ]
+    },
+    "wawa": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "weka": {
+      "pos": [
+        "adjective"
+      ]
+    },
+    "wile": {
+      "pos": [
+        "preverb"
+      ]
+    }
+  }
+}
diff --git a/augment_languages.json b/augment_languages.json
new file mode 100644
index 0000000..d6ce70d
--- /dev/null
+++ b/augment_languages.json
@@ -0,0 +1,80 @@
+{
+  "languages": [
+    "Alapi",
+    "Apikan",
+    "Awasa",
+    "Awisi",
+    "Elena",
+    "Epanja",
+    "Esi",
+    "Esuka",
+    "Inli",
+    "Insi",
+    "Intonesija",
+    "Inu",
+    "Ipo",
+    "Isilan",
+    "Italija",
+    "Iwisi",
+    "Jolupa",
+    "Kalike",
+    "Kanse",
+    "Kantun",
+    "Kinla",
+    "Lasina",
+    "Lomani",
+    "Losi",
+    "Lowasi",
+    "Mosijo",
+    "Netelan",
+    "Nijon",
+    "Nosiki",
+    "Panla",
+    "Peson",
+    "Pokasi",
+    "Posan",
+    "Potuke",
+    "Sameka",
+    "Seki",
+    "Sesi",
+    "Sikipe",
+    "Sonko",
+    "Sopisi",
+    "Sumi",
+    "Tansi",
+    "Topisin",
+    "Tosi",
+    "Kanse",
+    "Kepeka",
+    "Mewika",
+    "Oselija",
+    "Piten",
+    "Sonko",
+    "Tosi",
+    "Inli",
+    "Apiwili",
+    "Epelanto",
+    "Inli",
+    "Inota",
+    "Intelinwa",
+    "Ito",
+    "Kuwenja",
+    "Latan",
+    "Losupan",
+    "Mansi",
+    "Nawi",
+    "Olapi",
+    "Palepelen",
+    "Pasiki",
+    "Selen",
+    "Semisi",
+    "Sinan",
+    "Sintalin",
+    "Anlasi",
+    "Pisinpo",
+    "Soleso",
+    "Soma",
+    "Tolome",
+    "Tosulaki"
+  ]
+}
diff --git a/augment_names.json b/augment_names.json
new file mode 100644
index 0000000..8fc8945
--- /dev/null
+++ b/augment_names.json
@@ -0,0 +1,14 @@
+{
+  "names": [
+    "Asi",
+    "Juli",
+    "Kali",
+    "Komi",
+    "Lope",
+    "Ne",
+    "Sa",
+    "Sonja",
+    "Tepo",
+    "Wintu"
+  ]
+}
diff --git a/augment_places.json b/augment_places.json
new file mode 100644
index 0000000..750dfd2
--- /dev/null
+++ b/augment_places.json
@@ -0,0 +1,179 @@
+{
+  "places": [
+    "Amelika",
+    "Antasika",
+    "Apika",
+    "Asija",
+    "Elopa",
+    "Osejanija",
+    "Ankola",
+    "Eliteja",
+    "Isijopija",
+    "Kamelun",
+    "Kana",
+    "Kanpija",
+    "Kapon",
+    "Kenja",
+    "Kilipasi",
+    "Kine",
+    "Kinejekatolija",
+    "Kinepisa",
+    "Komo",
+    "Konko",
+    "Kosiwa",
+    "Lapewija",
+    "Lesoto",
+    "Lipija",
+    "Luwanta",
+    "Malakasi",
+    "Malawi",
+    "Mali",
+    "Malipe",
+    "Masu",
+    "Mosanpi",
+    "Mowisi",
+    "Mulitanija",
+    "Namipija",
+    "Naselija",
+    "Nise",
+    "Penen",
+    "Posuwana",
+    "Pukinapaso",
+    "Sanpija",
+    "Santapiken",
+    "Sasali",
+    "Sate",
+    "Sawasi",
+    "Seneka",
+    "Setapika",
+    "Sijelalijon",
+    "Sinpapuwe",
+    "Sipusi",
+    "Somalija",
+    "Sutan",
+    "Tansanija",
+    "Toko",
+    "Tunisi",
+    "Ukanta",
+    "Alensina",
+    "Awisi",
+    "Ekato",
+    "Kalalinuna",
+    "Kanata",
+    "Katemala",
+    "Kenata",
+    "Kosalika",
+    "Kupa",
+    "Mesiko",
+    "Mewika",
+    "Ontula",
+    "Palakawi",
+    "Panama",
+    "Papeto",
+    "Pasila",
+    "Pawama",
+    "Pelu",
+    "Pemuta",
+    "Penesuwela",
+    "Sameka",
+    "Sile",
+    "Sinita",
+    "Tominika",
+    "Ulukawi",
+    "Aja",
+    "Akanisan",
+    "Anku",
+    "Ilakija",
+    "Ilan",
+    "Intonesija",
+    "Isale",
+    "Jamanija",
+    "Kanpusi",
+    "Katelo",
+    "Kuli",
+    "Kusala",
+    "Kuwasi",
+    "Lanka",
+    "Losi",
+    "Lunpan",
+    "Malasija",
+    "Masu",
+    "Mijama",
+    "Nijon",
+    "Pakisan",
+    "Palani",
+    "Palata",
+    "Panla",
+    "Pilipina",
+    "Pilisin",
+    "Po",
+    "Sawusi",
+    "Sonko",
+    "Sulija",
+    "Tawi",
+    "Tuki",
+    "Uman",
+    "Utun",
+    "Wije",
+    "Alan",
+    "Antola",
+    "Elena",
+    "Epanja",
+    "Esalasi",
+    "Esi",
+    "Esuka",
+    "Inli",
+    "Isilan",
+    "Italija",
+    "Juke",
+    "Kalalinuna",
+    "Kanse",
+    "Katala",
+    "Katelo",
+    "Kinla",
+    "Kiposi",
+    "Lawi",
+    "Lijatuwa",
+    "Lisensan",
+    "Lomani",
+    "Losi",
+    "Lowasi",
+    "Lowenki",
+    "Lowensina",
+    "Lusepu",
+    "Maketonija",
+    "Mosijo",
+    "Motowa",
+    "Netelan",
+    "Nosiki",
+    "Pelalusi",
+    "Pesije",
+    "Peson",
+    "Pokasi",
+    "Posan",
+    "Posuka",
+    "Potuke",
+    "Samalino",
+    "Seki",
+    "Sipe",
+    "Sopisi",
+    "Sukosi",
+    "Sumi",
+    "Suwasi",
+    "Tansi",
+    "Tosi",
+    "Tuki",
+    "Ukawina",
+    "Wasikano",
+    "Wensa",
+    "Intonesija",
+    "Nusilan",
+    "Oselija",
+    "Papuwanijukini",
+    "Pisi",
+    "Samowa",
+    "Tona",
+    "Tuwalu",
+    "Wanuwatu"
+  ]
+}
diff --git a/augment_transliterations.json b/augment_transliterations.json
new file mode 100644
index 0000000..4aa85f4
--- /dev/null
+++ b/augment_transliterations.json
@@ -0,0 +1,9 @@
+{
+  "transliterations": [
+    "Siko",
+    "Jutu",
+    "Wesi",
+    "Pesepu",
+    "Tuwita"
+  ]
+}
diff --git a/filter_linku.jq b/filter_linku.jq
new file mode 100755
index 0000000..0e7b4f8
--- /dev/null
+++ b/filter_linku.jq
@@ -0,0 +1,62 @@
+#!/usr/bin/jq -rf
+
+# Takes a combined JSON object (jasima Linku JSON + augment*.json)
+# as input, reshapes it, and filters it according to dictionary
+# inclusion criteria.
+
+{
+    words:
+        (.data
+            # Reshape data to be smaller and more standard.
+            | map_values(
+                {
+                    word,
+                    pos,
+
+                    # Remaining attributes are used for filtering, or flags
+                    etymology,
+                    def: (.def.en),
+
+                    # First recognition percentage is used as it's always the
+                    # latest survey taken.
+                    recognition: (first(.recognition[]? | tonumber) // null),
+
+                    book: (if .book == "none" then null else .book end),
+                    commentary: .commentary
+                }
+            )
+            # Exclude words matching certain criteria before processing further.
+            | map_values(
+                # select all...
+                select(
+                    (
+                        # - words documented as typos;
+                        ( .etymology // "" | startswith("typo ") )
+
+                        # - words documented as reserved words
+                        or ( .def // "" | test("\\bword reserved\\b") )
+
+                        # - words deprecated by their creators
+                        or ( .commentary // "" | test("\\bdeprecated\\b") )
+
+                        # - words without a book *and* a recognition percentage
+                        #   of less than 1/3 of speakers, or no percentage
+                        or (
+                            (.book == null)
+                                and (
+                                    (.recognition == null)
+                                        or ((.recognition // 0) < (1/3)*100)
+                                )
+                        )
+                    )
+                    | not # and then invert the selection
+                )
+            )
+        )
+    , names: (
+        .places
+        + .languages
+        + .transliterations
+        + .names
+    )
+}
diff --git a/generate_dic.jq b/generate_dic.jq
new file mode 100755
index 0000000..c21666e
--- /dev/null
+++ b/generate_dic.jq
@@ -0,0 +1,51 @@
+#!/usr/bin/jq -rf
+
+# Takes a filtered dictionary as input, and produces a hunspell(5)
+# .dic file.
+
+# Prefix all the data with a word count, as per hunspell(5).
+(.words | keys | length)+(.names | length),
+(.words
+    # Do the actual data processing
+    | map(
+        .word
+        + "/kc" # Mark all words as not allowing changes in their case.
+        + (
+            # Forbid suggesting words that are neither recognized by
+            # either >=66% of word survey respondants, nor in pu.
+            if (
+                (.book == "pu")
+                    or (
+                        (.recognition >= ((1/3)*2)*100)
+                    )
+                ) then
+                ""
+            else
+                "/ns"
+            end
+        )
+        + (
+            # If the word object contains an array with the parts of speech,
+            # include it. If it does not, just silently move on.
+            # Parts of speech are currently not stored in Linku itself,
+            # but rather comes from our augment.json, awaiting it appearing
+            # in nimi Linku someday.
+            if (.pos != null) then
+                (.pos | " po:" + join(" po:"))
+            else
+                ""
+            end
+        )
+        # + (
+        #     # Add any additional hunspell flags.
+        #     if (.flags != null) then
+        #         (.flags | join(" "))
+        #     else
+        #         ""
+        #     end
+        # )
+    )[]
+)
+, (.names
+    | map(. + "/kc po:name")[]
+)
diff --git a/tok.aff b/tok.aff
index c476059..c943f8d 100644
--- a/tok.aff
+++ b/tok.aff
@@ -1,8 +1,35 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# TODO: This could use improving; namely...
+# - words should *always* be lowercase; only proper nouns are uppercase
+# - hunspell shouldn't suggest changes to uppercased versions of words
+#   when it encounters words that begin with uppercase letters.
+#   it should only offer to try and correct away from invalid syllables.
+
 SET UTF-8
-TRY aeijklmnopstuw
 
-KEY qwertyuiop|asdfghjkl|zxcvbnm
+LANG tok
+
+# Always suggest lowercasing uppercased words;
+# improve suggestions when words are missing spaces.
+# With, "Kenla" suggests "ken la"; without, suggests "en la".
+# Additionally, try to change words according to phonetic guidelines
+# <https://jan-ne.github.io/tp/tpize>
+KEY Aa|Ee|Ii|Jj|Kk|Ll|Mm|Nn|Oo|Pp|Ss|Tt|Uu|Ww|BP|bp|DT|dt|GK|gk
+
+# Make words suffixed with /kc always maintain their casing
+KEEPCASE kc
+
+# Forbid suggesting words that are not recognized by more than
+# 60% of respondants according to Linku's word survey data.
+NOSUGGEST ns
 
+# Forbid all word-compounding.
 BREAK 0
 
-KEEPCASE lc
+# Always correct away from usages of wuwojiti, the four invalid syllables.
+OCONV 4
+OCONV wu u
+OCONV wo o
+OCONV ji i
+OCONV ti si
diff --git a/tok.dic b/tok.dic
deleted file mode 100644
index c88f868..0000000
--- a/tok.dic
+++ /dev/null
@@ -1,125 +0,0 @@
-124
-a/lc po:particle
-akesi/lc po:noun
-ala/lc po:adjective
-alasa/lc po:verb
-ale/lc po:adjective po:noun po:number
-ali/lc po:adjective po:noun po:number
-anpa/lc po:adjective
-ante/lc po:adjective
-anu/lc po:particle
-awen/lc po:adjective
-e/lc po:particle
-en/lc po:particle
-esun/lc po:noun
-ijo/lc po:noun
-ike/lc po:adjective
-ilo/lc po:noun
-insa/lc po:noun
-jaki/lc po:adjective
-jan/lc po:noun
-jelo/lc po:adjective
-jo/lc po:verb
-kala/lc po:noun
-kalama/lc po:verb
-kama/lc po:adjective po:preverb
-kasi/lc po:noun
-ken/lc po:preverb po:adjective
-kepeken/lc po:preposition
-kili/lc po:noun
-kin/lc po:particle
-kiwen/lc po:noun
-ko/lc po:noun
-kon/lc po:noun
-kule/lc po:adjective
-kulupu/lc po:noun
-kute/lc po:noun po:verb
-la/lc po:particle
-lape/lc po:adjective
-laso/lc po:adjective
-lawa/lc po:adjective po:verb
-len/lc po:noun
-lete/lc po:adjective
-li/lc po:particle
-lili/lc po:adjective
-linja/lc po:noun
-lipu/lc po:noun
-loje/lc po:adjective
-lon/lc po:preposition
-luka/lc po:noun po:number
-lukin/lc po:noun po:verb po:preverb
-lupa/lc po:noun
-ma/lc po:noun
-mama/lc po:noun
-mani/lc po:noun
-meli/lc po:noun
-mi/lc po:noun
-mije/lc po:noun
-moku/lc po:verb
-moli/lc po:adjective
-monsi/lc po:noun
-mu/lc po:particle
-mun/lc po:noun
-musi/lc po:adjective
-mute/lc po:adjective po:noun
-namako/lc po:adjective
-nanpa/lc po:particle po:noun
-nasa/lc po:adjective
-nasin/lc po:noun
-nena/lc po:noun
-ni/lc po:adjective
-nimi/lc po:noun
-noka/lc po:noun
-o/lc po:particle
-oko/lc po:noun po:verb po:preverb
-olin/lc po:verb
-ona/lc po:noun
-open/lc po:verb
-pakala/lc po:adjective
-pali/lc po:verb
-palisa/lc po:noun
-pan/lc po:noun
-pana/lc po:verb
-pi/lc po:particle
-pilin/lc po:noun po:adjective
-pimeja/lc po:adjective
-pini/lc po:adjective
-pipi/lc po:noun
-poka/lc po:noun
-poki/lc po:noun
-pona/lc po:adjective
-pu/lc po:adjective
-sama/lc po:adjective po:preposition
-seli/lc po:noun
-selo/lc po:noun
-seme/lc po:particle
-sewi/lc po:noun po:adjective
-sijelo/lc po:noun
-sike/lc po:noun po:adjective
-sin/lc po:adjective
-sina/lc po:noun
-sinpin/lc po:noun
-sitelen/lc po:noun
-sona/lc po:verb po:preverb
-soweli/lc po:noun
-suli/lc po:adjective
-suno/lc po:noun
-supa/lc po:noun
-suwi/lc po:adjective
-tan/lc po:preposition
-taso/lc po:particle po:adjective
-tawa/lc po:preposition po:adjective
-telo/lc po:noun
-tenpo/lc po:noun
-toki/lc po:verb
-tomo/lc po:noun
-tu/lc po:number
-unpa/lc po:verb
-uta/lc po:noun
-utala/lc po:verb
-walo/lc po:adjective
-wan/lc po:adjective po:number
-waso/lc po:noun
-wawa/lc po:adjective
-weka/lc po:adjective
-wile/lc po:preverb