Skip to content

Commit

Permalink
Move writing system keys to representations parent key in word data
Browse files Browse the repository at this point in the history
  • Loading branch information
gregdan3 committed Dec 1, 2023
1 parent b3b13cd commit 9b78f22
Show file tree
Hide file tree
Showing 260 changed files with 1,602 additions and 1,066 deletions.
78 changes: 48 additions & 30 deletions .scripts/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,44 +70,60 @@ def trash(word: str, data: dict):


WORDS = nested_defaultdict()
REPRESENTATIONS = nested_defaultdict()
DEFINITIONS = nested_defaultdict()

COMMENTARY = nested_defaultdict()
SP_ETYMOLOGY = nested_defaultdict()
ETYMOLOGY = nested_defaultdict()

TRANSFORMER = "t"
DESTINATION = "d"


TRANSFORM_MAP = {
# NOTE: nasin nimi li sama nimi Linku
"word": noop,
"sitelen_pona": partial(transform_to_list, splitter=" "),
"ucsur": noop,
"sitelen_pona_etymology": trash, # send to translate
"sitelen_sitelen": noop,
"sitelen_emosi": noop,
# "luka_pona": partial(noop, _return_if_null=dict()),
"luka_pona": trash, # to be replaced with totally different doc
"coined_year": noop,
"coined_era": noop,
"book": partial(noop, _return_if_null="none"),
"usage_category": partial(noop, _return_if_null="obscure"),
"source_language": partial(noop, _return_if_null="unknown"),
"etymology": trash,
"etymology_data": trash, # to transform and send to translate
"ku_data": transform_ku_data,
"recognition": transform_recognition_data,
"see_also": partial(transform_to_list, splitter=","),
"tags": trash,
"author_verbatim": noop,
"author_verbatim_source": noop,
"pu_verbatim": partial(noop, _return_if_null=None),
"commentary": trash, # send to translate
"def": trash,
"word": {TRANSFORMER: noop, DESTINATION: WORDS},
# NOTE: this could be in `representations` but we decided against that
"sitelen_pona": {
TRANSFORMER: partial(transform_to_list, splitter=" "),
DESTINATION: REPRESENTATIONS,
},
"ucsur": {TRANSFORMER: noop, DESTINATION: REPRESENTATIONS},
"sitelen_pona_etymology": {TRANSFORMER: trash}, # send to translate
"sitelen_sitelen": {TRANSFORMER: noop, DESTINATION: REPRESENTATIONS},
"sitelen_emosi": {TRANSFORMER: noop, DESTINATION: REPRESENTATIONS},
# "luka_pona": {TRANSFORMER: partial(noop, _return_if_null=dict()), DESTINATION: WORDS},
"luka_pona": {TRANSFORMER: trash}, # to be replaced with totally different doc
"coined_year": {TRANSFORMER: noop, DESTINATION: WORDS},
"coined_era": {TRANSFORMER: noop, DESTINATION: WORDS},
"book": {TRANSFORMER: partial(noop, _return_if_null="none"), DESTINATION: WORDS},
"usage_category": {
TRANSFORMER: partial(noop, _return_if_null="obscure"),
DESTINATION: WORDS,
},
"source_language": {
TRANSFORMER: partial(noop, _return_if_null="unknown"),
DESTINATION: WORDS,
},
"etymology": {TRANSFORMER: trash},
"etymology_data": {TRANSFORMER: trash}, # to transform and send to translate
"ku_data": {TRANSFORMER: transform_ku_data, DESTINATION: WORDS},
"recognition": {TRANSFORMER: transform_recognition_data, DESTINATION: WORDS},
"see_also": {
TRANSFORMER: partial(transform_to_list, splitter=","),
DESTINATION: WORDS,
},
"tags": {TRANSFORMER: trash},
"author_verbatim": {TRANSFORMER: noop, DESTINATION: WORDS},
"author_verbatim_source": {TRANSFORMER: noop, DESTINATION: WORDS},
"pu_verbatim": {
TRANSFORMER: partial(noop, _return_if_null=None),
DESTINATION: WORDS,
},
"commentary": {TRANSFORMER: trash}, # send to translate
"def": {TRANSFORMER: trash}, # translate special case
}

TRANSFORMER = "t"
DESTINATION = "d"

TRANSLATION_MAP = {
"etymology_data": {
TRANSFORMER: transform_etym_data,
Expand Down Expand Up @@ -144,9 +160,10 @@ def main():
for word in data.keys():
for field in TRANSFORM_MAP.keys():
fetched = data[word].get(field)
formatted = TRANSFORM_MAP[field](word, fetched)
formatted = TRANSFORM_MAP[field][TRANSFORMER](word, fetched)
if formatted is not None:
WORDS[word][field] = formatted
write_to = TRANSFORM_MAP[field][DESTINATION]
write_to[word][field] = formatted

# if field == "ucsur":
# codepoint = data[word].get("ucsur")
Expand Down Expand Up @@ -177,6 +194,7 @@ def main():
# TODO: order keys freely instead of alphabetically
# or crowdin will solve this for us
for word, worddata in WORDS.items():
worddata["representations"] = REPRESENTATIONS[word]
with open(f"../words/{word}.toml", "w") as f:
tomlified = tomlkit.dumps(worddata, sort_keys=True)
f.write(tomlified)
Expand Down
10 changes: 6 additions & 4 deletions words/Pingo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "ku lili"
coined_era = "post-pu"
coined_year = "2020"
see_also = []
sitelen_emosi = ""
sitelen_pona = ["Pingo"]
sitelen_sitelen = ""
source_language = "a priori"
ucsur = ""
usage_category = "obscure"
word = "Pingo"

Expand All @@ -21,3 +17,9 @@ car = 6
2021-10 = 20
2022-08 = 5
2023-09 = 4

[representations]
sitelen_emosi = ""
sitelen_pona = ["Pingo"]
sitelen_sitelen = ""
ucsur = ""
10 changes: 6 additions & 4 deletions words/a.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = ["kin"]
sitelen_emosi = ""
sitelen_pona = ["a"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/a.jpg"
source_language = "onomatopoeia"
ucsur = "U+F1900"
usage_category = "core"
word = "a"

Expand Down Expand Up @@ -43,3 +39,9 @@ fr = "PARTICULE (accent, émotion ou confirmation)"
2020-04 = 99
2022-08 = 99
2023-09 = 99

[representations]
sitelen_emosi = ""
sitelen_pona = ["a"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/a.jpg"
ucsur = "U+F1900"
10 changes: 6 additions & 4 deletions words/aka.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "none"
coined_era = "post-ku"
coined_year = "2021"
see_also = ["natu"]
sitelen_emosi = ""
sitelen_pona = ["aka"]
sitelen_sitelen = ""
source_language = "multiple"
ucsur = ""
usage_category = "obscure"
word = "aka"

Expand All @@ -22,3 +18,9 @@ word = "across"
[recognition]
2022-08 = 0
2023-09 = 1

[representations]
sitelen_emosi = ""
sitelen_pona = ["aka"]
sitelen_sitelen = ""
ucsur = ""
10 changes: 6 additions & 4 deletions words/akesi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = []
sitelen_emosi = "🦎"
sitelen_pona = ["akesi", "akesi2"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/akesi.jpg"
source_language = "Dutch"
ucsur = "U+F1901"
usage_category = "core"
word = "akesi"

Expand All @@ -28,3 +24,9 @@ fr = "NOM animal non-mignon ; reptile, amphibien"
[recognition]
2022-08 = 98
2023-09 = 99

[representations]
sitelen_emosi = "🦎"
sitelen_pona = ["akesi", "akesi2"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/akesi.jpg"
ucsur = "U+F1901"
10 changes: 6 additions & 4 deletions words/ako.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ coined_era = ""
coined_year = ""
etymology = []
see_also = ["a"]
sitelen_emosi = ""
sitelen_pona = ["ako"]
sitelen_sitelen = ""
source_language = "unknown"
ucsur = ""
usage_category = "obscure"
word = "ako"

[recognition]
2022-08 = 0
2023-09 = 0

[representations]
sitelen_emosi = ""
sitelen_pona = ["ako"]
sitelen_sitelen = ""
ucsur = ""
10 changes: 6 additions & 4 deletions words/ala.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = []
sitelen_emosi = ""
sitelen_pona = ["ala"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/ala.jpg"
source_language = "Georgian"
ucsur = "U+F1902"
usage_category = "core"
word = "ala"

Expand All @@ -35,3 +31,9 @@ fr = "ADJECTIF non, ne, pas, aucun, zéro"
[recognition]
2022-08 = 100
2023-09 = 100

[representations]
sitelen_emosi = ""
sitelen_pona = ["ala"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/ala.jpg"
ucsur = "U+F1902"
10 changes: 6 additions & 4 deletions words/alasa.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = ["lukin"]
sitelen_emosi = "🏹"
sitelen_pona = ["alasa"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/alasa.jpg"
source_language = "Acadian French"
ucsur = "U+F1903"
usage_category = "core"
word = "alasa"

Expand Down Expand Up @@ -42,3 +38,9 @@ fr = "VERBE chasser, cueillir"
2020-04 = 97
2022-08 = 97
2023-09 = 99

[representations]
sitelen_emosi = "🏹"
sitelen_pona = ["alasa"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/alasa.jpg"
ucsur = "U+F1903"
10 changes: 6 additions & 4 deletions words/ale.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = ["ali"]
sitelen_emosi = "♾️"
sitelen_pona = ["ale"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/ale.jpg"
source_language = "Dutch"
ucsur = "U+F1904"
usage_category = "core"
word = "ale"

Expand Down Expand Up @@ -49,3 +45,9 @@ fr = "ADJECTIF tous, abondants, innombrables\nNOM tout, abondance, la vie, l'uni
[recognition]
2022-08 = 92
2023-09 = 90

[representations]
sitelen_emosi = "♾️"
sitelen_pona = ["ale"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/ale.jpg"
ucsur = "U+F1904"
10 changes: 6 additions & 4 deletions words/alente.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "none"
coined_era = "post-ku"
coined_year = "2022"
see_also = []
sitelen_emosi = ""
sitelen_pona = ["alente"]
sitelen_sitelen = ""
source_language = "toki pona"
ucsur = ""
usage_category = "obscure"
word = "alente"

Expand All @@ -18,3 +14,9 @@ word = "ale ante"
[recognition]
2022-08 = 2
2023-09 = 2

[representations]
sitelen_emosi = ""
sitelen_pona = ["alente"]
sitelen_sitelen = ""
ucsur = ""
10 changes: 6 additions & 4 deletions words/ali.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = ["ale"]
sitelen_emosi = "♾️"
sitelen_pona = ["ali"]
sitelen_sitelen = ""
source_language = "Dutch"
ucsur = ""
usage_category = "uncommon"
word = "ali"

Expand All @@ -23,3 +19,9 @@ universe = 21
[recognition]
2022-08 = 35
2023-09 = 32

[representations]
sitelen_emosi = "♾️"
sitelen_pona = ["ali"]
sitelen_sitelen = ""
ucsur = ""
10 changes: 6 additions & 4 deletions words/alu.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "none"
coined_era = "post-pu"
coined_year = "2018"
see_also = ["la"]
sitelen_emosi = ""
sitelen_pona = ["alu"]
sitelen_sitelen = ""
source_language = "toki pona"
ucsur = ""
usage_category = "obscure"
word = "alu"

Expand All @@ -20,3 +16,9 @@ word = "la"
2021-10 = 24
2022-08 = 3
2023-09 = 3

[representations]
sitelen_emosi = ""
sitelen_pona = ["alu"]
sitelen_sitelen = ""
ucsur = ""
10 changes: 6 additions & 4 deletions words/anpa.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = ["noka"]
sitelen_emosi = "⬇️"
sitelen_pona = ["anpa"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/anpa.jpg"
source_language = "Acadian French"
ucsur = "U+F1905"
usage_category = "core"
word = "anpa"

Expand All @@ -35,3 +31,9 @@ fr = "ADJECTIF incliné vers le bas, humble, qui dépend"
[recognition]
2022-08 = 99
2023-09 = 99

[representations]
sitelen_emosi = "⬇️"
sitelen_pona = ["anpa"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/anpa.jpg"
ucsur = "U+F1905"
10 changes: 6 additions & 4 deletions words/ante.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ book = "pu"
coined_era = "pre-pu"
coined_year = ""
see_also = ["sama"]
sitelen_emosi = "🔀"
sitelen_pona = ["ante"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/ante.jpg"
source_language = "Dutch"
ucsur = "U+F1906"
usage_category = "core"
word = "ante"

Expand Down Expand Up @@ -63,3 +59,9 @@ fr = "ADJECTIF différent, changé, autre"
[recognition]
2022-08 = 99
2023-09 = 99

[representations]
sitelen_emosi = "🔀"
sitelen_pona = ["ante"]
sitelen_sitelen = "https://raw.githubusercontent.com/lipu-linku/ijo/main/sitelensitelen/jonathangabel/ante.jpg"
ucsur = "U+F1906"
Loading

0 comments on commit 9b78f22

Please sign in to comment.