From 3341d0ad7baa79ff59cc7ffa676d70c1fc9a002f Mon Sep 17 00:00:00 2001 From: Paul Leclercq Date: Fri, 6 Sep 2024 15:22:35 +0200 Subject: [PATCH] review: keywords category (industrie/general) (#231) --- .gitignore | 1 + poetry.lock | 27 ++++++++++++++++++- pyproject.toml | 1 + .../data_processing/mediatree/api_import.py | 2 +- .../mediatree/keyword/keyword.py | 22 ++++----------- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index d8c0e71d..938d8e3f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ secrets/pwd_api.txt secrets/username_api.txt +documents-experts/ cc-bio.json *.xlsx coverage_re diff --git a/poetry.lock b/poetry.lock index cf9f629d..3b38e1d0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -905,6 +905,17 @@ https = ["urllib3 (>=1.24.1)"] paramiko = ["paramiko"] pgp = ["gpg"] +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "fastjsonschema" version = "2.20.0" @@ -1949,6 +1960,20 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "openpyxl" +version = "3.1.5" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "packaging" version = "24.1" @@ -3959,4 +3984,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11.0,<3.13.0" -content-hash = "6bb12e1e78d6b683ad8922a451263d1653031c7d6140cd49d45d5c869c355292" +content-hash = "3ba5d5869d6550cb84515dbaa06e483050984031215811169a598f246334774e" diff --git a/pyproject.toml b/pyproject.toml index e31a3415..90e7a2a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ tenacity = "^8.2.3" sentry-sdk = "^2.13.0" modin = {extras = ["ray"], version = "^0.31.0"} numpy = "1.26.4" +openpyxl = "^3.1.5" [build-system] requires = ["poetry-core>=1.1"] build-backend = "poetry.core.masonry.api" diff --git a/quotaclimat/data_processing/mediatree/api_import.py b/quotaclimat/data_processing/mediatree/api_import.py index 55241def..6334c26a 100644 --- a/quotaclimat/data_processing/mediatree/api_import.py +++ b/quotaclimat/data_processing/mediatree/api_import.py @@ -230,7 +230,7 @@ def parse_reponse_subtitle(response_sub, channel = None, channel_program = "", c logging.getLogger("modin.logging.default").setLevel(logging.WARNING) if(total_results > 0): logging.info(f"{total_results} 'total_results' field") - new_df : pd.DataFrame = json_normalize(response_sub.get('data')) + new_df : pd.DataFrame = json_normalize(response_sub.get('data')) # TODO UserWarning: json_normalize is not currently supported by PandasOnRay, defaulting to pandas implementation. logging.debug("Schema from API before formatting :\n%s", new_df.dtypes) pd.set_option('display.max_columns', None) logging.debug("head: :\n%s", new_df.head()) diff --git a/quotaclimat/data_processing/mediatree/keyword/keyword.py b/quotaclimat/data_processing/mediatree/keyword/keyword.py index 75bb013e..076e3097 100644 --- a/quotaclimat/data_processing/mediatree/keyword/keyword.py +++ b/quotaclimat/data_processing/mediatree/keyword/keyword.py @@ -1048,11 +1048,11 @@ }, { "keyword": "polluer", - "category": "general" + "category": "General" }, { "keyword": "pollution", - "category": "general" + "category": "General" }, { "keyword": "pollution de l’air", @@ -1060,7 +1060,7 @@ }, { "keyword": "rejets industriels", - "category": "industrie" + "category": "Industrie" }, { "keyword": "site minier", @@ -2986,7 +2986,7 @@ }, { "keyword": "relocaliser la production", - "category": "industrie" + "category": "Industrie" }, { "keyword": "rénovation", @@ -3016,10 +3016,6 @@ "keyword": "restreindre", "category": "General" }, - { - "keyword": "restreindre", - "category": "general" - }, { "keyword": "restriction", "category": "General" @@ -3416,7 +3412,7 @@ }, { "keyword": "usine", - "category": "industrie" + "category": "Industrie" }, { "keyword": "vache", @@ -4652,10 +4648,6 @@ "keyword": "norme", "category": "General" }, - { - "keyword": "norme", - "category": "General" - }, { "keyword": "parc", "category": "Ecosystème" @@ -4720,10 +4712,6 @@ "keyword": "restreindre", "category": "General" }, - { - "keyword": "restreindre", - "category": "general" - }, { "keyword": "restriction", "category": "General"