Skip to content

Commit

Permalink
1603_1.py (#29): more metadata on statuses files
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Apr 17, 2022
1 parent e471c1b commit 41ce0f9
Show file tree
Hide file tree
Showing 6 changed files with 277 additions and 130 deletions.
2 changes: 1 addition & 1 deletion officinam/1603/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
- https://en.wiktionary.org/wiki/ontology#English
- https://archive.org/details/bub_gb_rM5gdGMu-rAC

[![Ogdoas Scholastica Jacob Lorhard](https://upload.wikimedia.org/wikipedia/commons/9/9d/Ogdoas_Scholastica_Jacob_Lorhard.jpg)](https://archive.org/details/bub_gb_rM5gdGMu-rAC)
[![Ogdoas Scholastica Jacob Lorhard](https://upload.wikimedia.org/wikipedia/commons/9/9d/Ogdoas_Scholastica_Jacob_Lorhard.jpg)](https://archive.org/details/bub_gb_rM5gdGMu-rAC)
53 changes: 52 additions & 1 deletion officinam/999999999/0/1603_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
# from datetime import datetime


import json
from zlib import crc32


Expand Down Expand Up @@ -2189,6 +2190,9 @@ def methodi_ex_codice(self) -> list:
]
scope_and_content = self.quod_res('0_1603_1_7_2616_7535')
# paginae.append(str(scope_and_content))

# @TODO: use self.quod_res_methodi_ex_dictionariorum_corde() instead
# of this block
if scope_and_content and \
qhxl(scope_and_content, meta_langs) is not None:
term = qhxl(scope_and_content, meta_langs)
Expand Down Expand Up @@ -2354,6 +2358,32 @@ def quod_res(self, codicem: str) -> dict:

return None

def quod_res_caveat_lector(self) -> str:
meta_langs = [
'#item+rem+i_qcc+is_zxxx+ix_codexfacto'
]
caveat_lector = self.quod_res('0_1603_1_7_2617_9289584')
if caveat_lector and \
qhxl(caveat_lector, meta_langs) is not None:
term = qhxl(caveat_lector, meta_langs)
term2 = self.notitiae.translatio(term)
return term2

return None

def quod_res_methodi_ex_dictionariorum_corde(self) -> str:
meta_langs = [
'#item+rem+i_qcc+is_zxxx+ix_codexfacto'
]
scope_and_content = self.quod_res('0_1603_1_7_2616_7535')
if scope_and_content and \
qhxl(scope_and_content, meta_langs) is not None:
term = qhxl(scope_and_content, meta_langs)
term2 = self.notitiae.translatio(term)
return term2

return None

def res_explanationibus(
self, res: dict, picturae: List[Type['CodexAnnexo']] = None) -> list:
"""rēs explānātiōnibus
Expand Down Expand Up @@ -2992,7 +3022,11 @@ def initiari(self):
# raise ValueError(str(self.linguae))

def crc(self, res: Union[set, list]) -> str:
return crc32(b'TODO')
if isinstance(res, set):
res = list(res)
json_text = json.dumps(res)
# return crc32(b'TODO')
return crc32(json_text.encode())

def ex_codice(self):
nomen = self.codex.m1603_1_1__de_codex['#item+rem+i_mul+is_zyyy']
Expand All @@ -3004,9 +3038,22 @@ def ex_codice(self):
# tempus_opus = datetime.datetime.now()
tempus_opus = datetime.datetime.now().replace(microsecond=0)

methodi_ex_dictionariorum_corde = \
self.codex.quod_res_methodi_ex_dictionariorum_corde()
caveat_lector = \
self.codex.quod_res_caveat_lector()

resultatum = {
'annotationes_internalibus': self.codex.n1603ia,
'meta': {
# Caveat lector
'caveat_lector': {
'mul-Zyyy': caveat_lector
},
# Methodī ex dictiōnāriōrum corde
'methodi_ex_dictionariorum_corde': {
'mul-Zyyy': methodi_ex_dictionariorum_corde
},
'nomen': nomen
},
'cdn': self.cdn,
Expand All @@ -3015,11 +3062,13 @@ def ex_codice(self):
'concepta': None,
'res_lingualibus': self.crc(self.codex.usus_linguae),
'res_interlingualibus': self.crc(self.codex.usus_ix_qcc),
'res_picturae': None,
},
'summa': {
'concepta': summis_concepta,
'res_lingualibus': usus_linguae,
'res_interlingualibus': usus_ix_qcc,
'res_picturae': None,
},
'tempus': {
'opus': tempus_opus.isoformat()
Expand Down Expand Up @@ -3100,6 +3149,7 @@ def imprimere_in_markdown(self):
status['status_quo']['summa']['codex']))
paginae.append(' - concepta_non_unicum: {0}'.format(
status['status_quo']['summa']['concepta_non_unicum']))
paginae.append('')
for codex, item in status['librarium'].items():
paginae.append('## {0} {1}'.format(codex, item['meta']['nomen']))
paginae.append('- status_quo')
Expand All @@ -3111,6 +3161,7 @@ def imprimere_in_markdown(self):
paginae.append(
' - res_lingualibus: {0}'.format(
item['status_quo']['summa']['res_lingualibus']))
paginae.append('')

# return [yaml.dump(
# status, allow_unicode=True)]
Expand Down
30 changes: 27 additions & 3 deletions officinam/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -918,14 +918,16 @@ neo_codex_de_numerordinatio_pdf() {
# Arguments:
# numerordinatio
# est_temporarium_fontem (default "1", from 99999/)
# est_temporarium_objectivumm (dfault "0", from real namespace)
# est_temporarium_objectivumm (default "0", from real namespace)
# always_stale (default "0", if 1, don't run stale_archive check)
# Outputs:
# Convert files
#######################################
file_translate_csv_de_numerordinatio_q() {
numerordinatio="$1"
est_temporarium_fontem="${2:-"1"}"
est_temporarium_objectivum="${3:-"0"}"
always_stale="${4:-"0"}"

_path=$(numerordinatio_neo_separatum "$numerordinatio" "/")
_nomen=$(numerordinatio_neo_separatum "$numerordinatio" "_")
Expand Down Expand Up @@ -963,7 +965,12 @@ file_translate_csv_de_numerordinatio_q() {

# echo "${FUNCNAME[0]} sources changed_recently. Reloading..."

if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi
if [ -z "$(stale_archive "$objectivum_archivum")" ]; then
if [ "$always_stale" != '1' ]; then
return 0;
fi
echo "Cache may exist, but always_stale enabled [$numerordinatio]"
fi

echo "[ DOWNLOAD Wikidata ] ${FUNCNAME[0]} stale data on [$objectivum_archivum], refreshing..."

Expand Down Expand Up @@ -2149,7 +2156,11 @@ actiones_completis_publicis() {
# @TODO: implement the download
# file_download_if_necessary "$DATA_1603_45_31" "1603_45_31" "csv" "tm.hxl.csv" "hxltmcli" "1"
file_convert_numerordinatio_de_hxltm "$numerordinatio" "1" "0"
file_translate_csv_de_numerordinatio_q "$numerordinatio" "0" "0"

# @TODO: implement decent check if need download Wikidata Q again
# now is hardcoded as "1" on last parameter
# file_translate_csv_de_numerordinatio_q "$numerordinatio" "0" "0"
file_translate_csv_de_numerordinatio_q "$numerordinatio" "0" "0" "1"
file_merge_numerordinatio_de_wiki_q "$numerordinatio" "0" "0"
file_convert_tmx_de_numerordinatio11 "$numerordinatio"
file_convert_tbx_de_numerordinatio11 "$numerordinatio"
Expand All @@ -2161,3 +2172,16 @@ actiones_completis_publicis() {
upload_cdn "$numerordinatio"
}

# TODO: document...
deploy_0_9_markdown() {
# ROOTDIR assumed to be on officinam
objectivum_archivum="${ROOTDIR}/README.md"
echo "${FUNCNAME[0]} [$objectivum_archivum]..."

"${ROOTDIR}/999999999/0/1603_1.py" \
--codex-de 1603_1_1 \
--status-quo \
--ex-librario="cdn" \
--status-in-markdown \
> "$objectivum_archivum"
}
3 changes: 2 additions & 1 deletion officinam/999999999/999999_17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ file_convert_numerordinatio_de_hxltm "1603_1_51" "1" "0"
# temp_save_status "1603_45_31"
# actiones_completis_publicis "1603_45_31"
# actiones_completis_publicis "1603_63_101"
opus_temporibus_cdn
# opus_temporibus_cdn
deploy_0_9_markdown

# file_download_if_necessary "$DATA_1603_23_36" "1603_23_36" "csv" "tm.hxl.csv" "hxltmcli" "1"
# file_convert_numerordinatio_de_hxltm "1603_23_36" "1" "0"
Expand Down
194 changes: 70 additions & 124 deletions officinam/README.md
Original file line number Diff line number Diff line change
@@ -1,125 +1,71 @@
# Numerordĭnātĭo pre-compiled data tables and automation scripts
# 1603 Librārium
- status_quo
- summa
- codex: 11
- concepta_non_unicum: 1064
## 1603_25_1 //dictiōnāria de partes corporis humani//
- status_quo
- concepta: 10
- res_interlingualibus: 14
- res_lingualibus: 148

## 1603_44_101 /Dictiōnāria basibus dē sceleribus contrā persōnam/
- status_quo
- concepta: 22
- res_interlingualibus: 14
- res_lingualibus: 162

## 1603_44_111 /Dictiōnāria basibus dē sceleribus contrā ūnam commūnitātem ab rēctōribus/
- status_quo
- concepta: 36
- res_interlingualibus: 16
- res_lingualibus: 226

## 1603_45_1 //dictiōnāria de hūmānitātēs interimperia//
- status_quo
- concepta: 347
- res_interlingualibus: 15
- res_lingualibus: 245

## 1603_45_19 Dictiōnāria dē locī generibus
- status_quo
- concepta: 67
- res_interlingualibus: 12
- res_lingualibus: 241

## 1603_45_31 Dictiōnāria de calamitātibus
- status_quo
- concepta: 25
- res_interlingualibus: 14
- res_lingualibus: 211

## 1603_63_101 //Dictiōnāria basibus dē rēbus vītālibus necessāriīs//
- status_quo
- concepta: 332
- res_interlingualibus: 14
- res_lingualibus: 249

## 1603_64_41 //Dictiōnāria basibus dē perītiae per susurrōs vītālēs//
- status_quo
- concepta: 56
- res_interlingualibus: 12
- res_lingualibus: 150

## 1603_64_604 //Dictiōnāria de aquīs et sānātiōnibus ex līmpidīs//
- status_quo
- concepta: 28
- res_interlingualibus: 12
- res_lingualibus: 227

## 1603_84_1 /Dictiōnāria dentālium/
- status_quo
- concepta: 60
- res_interlingualibus: 12
- res_lingualibus: 4

## 1603_1_1 //Numeroclatura Generālium ad MM ex Numerordĭnātĭo//@lat-Latn
- status_quo
- concepta: 81
- res_interlingualibus: 15
- res_lingualibus: 9

- [1603/1/1/1603_1_1.no1.tm.hxl.csv](1603/1/1/1603_1_1.no1.tm.hxl.csv)
- [HXL-CPLP-Vocab_Auxilium-Humanitarium-API/1603_1_1.tm.hxl](https://docs.google.com/spreadsheets/d/1ih3ouvx_n8W5ntNcYBqoyZ2NRMdaA0LRg5F9mGriZm4/edit#gid=2095477004)
- https://github.com/EticaAI/n-data/tree/main/1603

<!--
> **Note: most (but not all) of this overview is being converted to machine processable format on [1603/1/1/1603_1_1.no1.tm.hxl.csv](1603/1/1/1603_1_1.no1.tm.hxl.csv)** (online version at [HXL-CPLP-Vocab_Auxilium-Humanitarium-API/1603_1_1.tm.hxl](https://docs.google.com/spreadsheets/d/1ih3ouvx_n8W5ntNcYBqoyZ2NRMdaA0LRg5F9mGriZm4/edit#gid=2095477004)). Some notable changes:
> - Base namespaces, instead of "group codes released by an organization" are renamed by suggested intent.
> - _TODO: add more explanations here_
When referring to concepts, we use as exchange keys numeric codes with explicit self taxonomy.
This decision both allows neutrality when working with multiple cultures, and already will feel familiar for people who have [administrative divisions](https://en.wikipedia.org/wiki/Administrative_division) where the baseline can change depending on who is considered a reference.
In general each group of concepts (for example, codes used in a country to define subdivisions) will not add a prefix for the entire country itself (because everything there _is_ from that country). But at international level, this is relevant, and both `1603:45:49` and `1603:45:16` are the best reference on this subject. Other areas are less complex, but we still use nested nomenclature.
While decisions on how to organize, as of 2021-01, are still open, **such numeric taxonomy can persist more long term**. In the worst case, it is also easier to create direct aliases when working with software.
> Practical example:
> - We opt for _`1603:45:49` /UN m49/_ instead of _`1603:47:3166:1` (the neutral number to reference ISO 3166)_ as the default key to reference one type of concept group.
> - We inject information, in special translations, from namespaces such as _`1603:3:12` /Wikidata/_ on `1603:45:49`.
> - We make sure that the compiled tables in special vocabularies (the translations) have references from where the data was from, so implementers can decide use or not. However the biggest relevance of this is for languages without official translations (even if they do exist, only are not pre-compiled).
## [`1603`] /Base prefix/
- [1603/](1603/)
The multilingual-lexicography-automation project will use `1603` as the main namespace to reference other references.
### [`1603:1`] /Metadata overview of 1603 namespaces/
- [1603/1](1603/1)
### [`1603:3`] /Wikimedia Foundation, Inc/
- [1603/3/](1603/3/)
#### [`1603:3:12`] /Wikidata/
### [`1603:13`] /HXL/
- [1603/13/](1603/13/)
### [`1603:45`] /UN/
- [1603/45/](1603/45/)
### [`1603:45:49`] /UN m49/
- [1603/45/49/](1603/45/49/)
### [`1603:45:16`] /Place codes (by UN m49 numeric)/
- [1603/45/16/](1603/45/16/)
> Note: the main interest in multilingual-lexicography-automation is **linguistic content** and how to conciliate data via existing coding systems both for ourselves and third parties **interested in improving multilingualism**. Except for potential data to allow disambiguation which is is not heavyweight (such as centroid coordinates) we do not plan to re-publish administrative boundaries.
### [`1603:47`] /ISO/
- [1603/47/](1603/47/)
#### [`1603:47:15924`] /ISO 15924, Codes for the representation of names of scripts/
- [1603/47/15924/](1603/47/15924/)
### [`1603:87`] /Unicode/
- [1603/87/](1603/87/)
### [`1603:994`] /dentāle vocābulāriō/
- [1603/994/](1603/994/)
### [`1603:2600`] /Generic - Multiplication tables/
- [1603/2600/](1603/2600/)
<details>
<summary>Click to see additional iternal data files</summary>
## [`999999`] /namespace for intermediate cached files/
- [999999/](999999/)
## [`999999999`] /namespace for automation scripts/
- [999999999/](999999999/)
</details>
## [`1603:*`] Numerordĭnātĭo stability (global level)
> Note: this section assumes data tables published by @EticaAI / @HXL-CPLP **and** global level reference between concepts. Every concept-group have both global numerordĭnātĭo and a local identificator recommended to be used when transposing.
### Stable namespaces
- `1603`
- the base namespace `1603` will be used by @EticaAI / @HXL-CPLP as a tradeoff between something unique while not overlong.
- `1603:45:49` /Standard country or area codes for statistical use (M49)/
- Comments:
- Any local identifier MUST be aligned with the reference organization.
- This namespace can have from 0 to 999 items. No value out of this range is allowed. If used to exchange data with tools that expect UN m49, pre-prend leading zeros to fill 3 characters. However, under Numerordĭnātĭo scheme they are not necessary.
- `900..999` range may have additional semantics when used under `1603:45:49`
Some will be granted to never change.
- External guides:
- https://unstats.un.org/unsd/publication/SeriesM/Series_M49_(1970)_en-fr.pdf
- https://unstats.un.org/unsd/methodology/m49/
- https://undocs.org/pdf?symbol=en/ST/CS/SER.F/347/Rev.1
- https://en.wikipedia.org/wiki/UN_M49
### Likely near stable namespaces
- `1603:45:16` /Place codes/
- Comments:
- This namespace, as its first level, reuses `1603:45:49` with the exception of range `900..999`.
- `1603:3:12`:
- TODO: document Q and P and make it stable interface.
### Intentionally non-stable namespaces
- [`1603:999999`]: namespace for local cache of data files.
-->

## Disclaimers

**Individuals direct and indirect contributors of this project are not affiliated with external organizations. The use of labeled numerical namespaces (need to make easier for implementer) explicitly do not means endorsement of the organizations or theirs internal groups deciding the coding systems.**

Ad-hoc collaboration (such as bug fixes or suggestions to improve interoperability) between @EticaAI / @HXL-CPLP and individuals which work on any specific namespace cannot be considered formal endorsement of their organization.

Even reuse of work (in special pre-compiled translations, or tested workflows on how to re-generate then from external collaborators) cannot be assumed as endorsement by the work on this monorepo and final work do not need to be public domain as the translations. Such feature can also be called [data roundtripping](https://diff.wikimedia.org/2019/12/13/data-roundtripping-a-new-frontier-for-glam-wiki-collaborations/) and can be stimulated on call to actions such as [Wikiprojecs](https://m.wikidata.org/wiki/Wikidata:WikiProjects) or ad hoc initiatives such [TICO-19](https://tico-19.github.io/).

Please note that even successful projects such as GLAM (see [Wikimedia Commons Data Roundtripping Final Report](https://upload.wikimedia.org/wikipedia/commons/e/e8/Wikimedia_Commons_Data_Roundtripping_-_Final_report.pdf)) in addition to lack of more software and workflows, can have issues such as duplication of data import/export because of lack of consistent IDs. So as part of multilingual lexicography, for sake of re usability, we need to give something and already draft how others could do it. A lot of inspiration for this is [strategies used on scientific names](https://en.wikipedia.org/wiki/Scientific_name) (except that you don't need to know Latin grammar).
Loading

0 comments on commit 41ce0f9

Please sign in to comment.