Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: script to filter and generate labels (with translations) #333

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions data/categories/filter_categories.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
"""
https://static.openfoodfacts.org/data/taxonomies/categories.full.json

How-to run ?
> pip install openfoodfacts
> python data/categories/filter_categories.py
"""

import json
from openfoodfacts.taxonomy import get_taxonomy


OFF_TAXONOMY_NAME = "category"
OP_LANGUAGES_FILE = "src/i18n/data/languages.json"
OLD_CATEGORIES_FILE = "src/data/category-tags.json"

PARENT_CATEGORIES_ID = [
"en:vegetables", # 391
"en:fruits", # 287
Expand All @@ -26,12 +38,12 @@


def get_languages():
with open("src/i18n/data/languages.json") as f:
with open(OP_LANGUAGES_FILE) as f:
return json.load(f)


def get_category_taxonomy():
return get_taxonomy("category")
return get_taxonomy(OFF_TAXONOMY_NAME)


def get_taxonomy_node_by_id(taxonomy, node_id):
Expand Down Expand Up @@ -113,7 +125,7 @@ def write_categories_to_files(categories):


def compare_new_categories_with_old_categories():
with open("src/data/category-tags.json") as f:
with open(OLD_CATEGORIES_FILE) as f:
old_categories = json.load(f)
print("old_categories", len(old_categories))

Expand All @@ -140,11 +152,6 @@ def compare_new_categories_with_old_categories():


if __name__ == "__main__":
"""
How-to run ?
> pip install openfoodfacts
> python data/categories/filter_categories.py
"""
# init
CATEGORIES_FULL = get_category_taxonomy()
print("Total number of categories:", len(CATEGORIES_FULL))
Expand Down
126 changes: 126 additions & 0 deletions data/labels/generate_labels_translations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""
https://static.openfoodfacts.org/data/taxonomies/labels.full.json

How-to run ?
> pip install openfoodfacts
> python data/labels/generate_labels_translations.py
"""

import json
from openfoodfacts.taxonomy import get_taxonomy


OFF_TAXONOMY_NAME = "label"
OP_LANGUAGES_FILE = "src/i18n/data/languages.json"
OLD_LABELS_FILE = "src/data/labels-tags.json"

KEEP_ONLY = [
"en:organic"
]


def get_languages():
with open(OP_LANGUAGES_FILE) as f:
return json.load(f)


def get_label_taxonomy():
return get_taxonomy(OFF_TAXONOMY_NAME)


def get_taxonomy_node_by_id(taxonomy, node_id):
return next((node for node in taxonomy.iter_nodes() if node.id == node_id), None)


def get_taxonomy_node_list_by_id_list(taxonomy, node_id_list):
node_list = list()
for node_id in node_id_list:
taxonomy_node = get_taxonomy_node_by_id(taxonomy, node_id)
if taxonomy_node:
node_list.append(taxonomy_node)
return node_list


def taxonomy_node_list_to_dict_list(node_list, delete_parents=False):
node_dict_list = list()
for node in node_list:
node_dict = { "id": node.id, **node.to_dict() }
if delete_parents:
del node_dict["parents"]
node_dict_list.append(node_dict)
return node_dict_list


def get_taxonomy_node_children_full_list(taxonomy, node_parent):
children_node_list = list()
for node in taxonomy.iter_nodes():
node_parents = node.get_parents_hierarchy()
if next((n for n in node_parents if n == node_parent), None):
children_node_list.append(node)
return children_node_list


def filter_labels(taxonomy):
node_list = list()
for node in taxonomy.iter_nodes():
if node.id in KEEP_ONLY:
node_list.append(node)
return node_list


def write_labels_to_files(labels):
languages = get_languages()
for language in languages:
language_code = language['code']
language_labels = list()
# for each label, get translation (or default to en)
for label in labels:
language_label_name = label['name'][language_code] if (language_code in label['name']) else label['name']['en']
language_labels.append({"id": label['id'], "name": language_label_name})
# order by name
language_labels = sorted(language_labels, key=lambda x: x['name'])
# write to file
with open(f"src/data/labels/{language['code']}.json", "w") as f:
json.dump(language_labels, f, ensure_ascii=False)


def compare_new_labels_with_old_labels():
with open("src/data/labels-tags.json") as f:
old_labels = json.load(f)
print("old_labels", len(old_labels))

with open("src/data/labels/en.json") as f:
new_labels = json.load(f)
print("new_labels", len(new_labels))

# check missing in new
label_missing_in_new_list = list()
for label in old_labels:
found = next((c for c in new_labels if c['id'] == label['id']), None)
if not found:
label_missing_in_new_list.append(label)
print("missing in new", len(label_missing_in_new_list))
print(label_missing_in_new_list)

# check missing in old
label_missing_in_old_list = list()
for label in old_labels:
found = next((c for c in old_labels if c['id'] == label['id']), None)
if not found:
label_missing_in_old_list.append(label)
print("missing in old", len(label_missing_in_old_list))


if __name__ == "__main__":
# init
LABELS_FULL = get_label_taxonomy()
print("Total number of labels:", len(LABELS_FULL))

labels_filtered = filter_labels(LABELS_FULL)
labels_filtered_to_dict_list = taxonomy_node_list_to_dict_list(list(labels_filtered), delete_parents=True)
print("Labels remaining:", len(labels_filtered_to_dict_list))

write_labels_to_files(labels_filtered_to_dict_list)
print("Wrote to language files")

compare_new_labels_with_old_labels()
19 changes: 12 additions & 7 deletions data/origins/generate_origins_translations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
"""
https://static.openfoodfacts.org/data/taxonomies/origins.full.json

How-to run ?
> pip install openfoodfacts
> python data/origins/generate_origins_translations.py
"""

import json
from openfoodfacts.taxonomy import get_taxonomy


OFF_TAXONOMY_NAME = "origin"
OP_LANGUAGES_FILE = "src/i18n/data/languages.json"
OLD_ORIGINS_FILE = "src/data/origins-tags.json"

WITH_PROPERTIES = [
Expand All @@ -25,12 +35,12 @@


def get_languages():
with open("src/i18n/data/languages.json") as f:
with open(OP_LANGUAGES_FILE) as f:
return json.load(f)


def get_origin_taxonomy():
return get_taxonomy("origin")
return get_taxonomy(OFF_TAXONOMY_NAME)


def get_taxonomy_node_by_id(taxonomy, node_id):
Expand Down Expand Up @@ -119,11 +129,6 @@ def compare_new_origins_with_old_origins():


if __name__ == "__main__":
"""
How-to run ?
> pip install openfoodfacts
> python data/origins/generate_origins_translations.py
"""
# init
ORIGINS_FULL = get_origin_taxonomy()
print("Total number of origins:", len(ORIGINS_FULL))
Expand Down
1 change: 1 addition & 0 deletions src/data/labels/aa.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ach.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/af.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ak.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/am.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ar.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/as.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ast.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/az.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/be.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ber.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/bg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Био"}]
1 change: 1 addition & 0 deletions src/data/labels/bm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/bn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/bo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/br.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/bs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ca.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Orgànic"}]
1 change: 1 addition & 0 deletions src/data/labels/ce.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/chr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/co.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/crs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/cs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Bio"}]
1 change: 1 addition & 0 deletions src/data/labels/cv.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/cy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/da.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Økologisk"}]
1 change: 1 addition & 0 deletions src/data/labels/de.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Bio"}]
1 change: 1 addition & 0 deletions src/data/labels/el.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/en.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/en_AU.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/en_GB.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/eo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/es.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Ecológico"}]
1 change: 1 addition & 0 deletions src/data/labels/et.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/eu.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/fa.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/fi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Luomu"}]
1 change: 1 addition & 0 deletions src/data/labels/fil.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/fo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/fr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Bio"}]
1 change: 1 addition & 0 deletions src/data/labels/ga.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/gd.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/gl.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/gu.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ha.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/he.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "אורגני"}]
1 change: 1 addition & 0 deletions src/data/labels/hi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/hr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Ekološki uzgoj"}]
1 change: 1 addition & 0 deletions src/data/labels/ht.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/hu.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Bio"}]
1 change: 1 addition & 0 deletions src/data/labels/hy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/id.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ii.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/is.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/it.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Biologico"}]
1 change: 1 addition & 0 deletions src/data/labels/iu.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
1 change: 1 addition & 0 deletions src/data/labels/ja.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "en:organic", "name": "Organic"}]
Loading
Loading