From 6ac72c80791d76de782e38fca0724342454f4756 Mon Sep 17 00:00:00 2001 From: Paul Leclercq Date: Tue, 15 Oct 2024 17:28:35 +0200 Subject: [PATCH] Data quality: normalize channel title (#271) * wip * test: add real tests --- README.md | 1 + quotaclimat/data_processing/mediatree/api_import.py | 3 +++ test/sitemap/test_mediatree.py | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 10c222707..250e23446 100644 --- a/README.md +++ b/README.md @@ -399,3 +399,4 @@ There is a debt regarding the cleanest of the code right now. Let's just not mak ## Thanks * [Eleven-Strategy](https://www.welcometothejungle.com/fr/companies/eleven-strategy) +* [Kevin Tessier](https://kevintessier.fr) \ No newline at end of file diff --git a/quotaclimat/data_processing/mediatree/api_import.py b/quotaclimat/data_processing/mediatree/api_import.py index 2cdda10a3..770e9cc9a 100644 --- a/quotaclimat/data_processing/mediatree/api_import.py +++ b/quotaclimat/data_processing/mediatree/api_import.py @@ -255,6 +255,9 @@ def parse_reponse_subtitle(response_sub, channel = None, channel_program = "", c inplace=True ) + logging.debug("setting channel_title") + new_df['channel_title'] = new_df.apply(lambda x: get_channel_title_for_name(x['channel_name']), axis=1) + logging.debug(f"setting program {channel_program}") # weird error if not using this way: (ValueError) format number 1 of "20h30 le samedi" is not recognized new_df['channel_program'] = new_df.apply(lambda x: channel_program, axis=1) diff --git a/test/sitemap/test_mediatree.py b/test/sitemap/test_mediatree.py index c1a9e24c1..53ec7bec8 100644 --- a/test/sitemap/test_mediatree.py +++ b/test/sitemap/test_mediatree.py @@ -41,7 +41,7 @@ "text": "france" } ], - "channel":{"name":"m6","title":"M6","radio":false},"start":1704798000, + "channel":{"name":"m6","title":"fake m6","radio":false},"start":1704798000, "plaintext":"test1" }, { @@ -51,7 +51,7 @@ "text": "adaptation" } ], - "channel":{"name":"tf1","title":"TF1","radio":false},"start":1704798120, + "channel":{"name":"tf1","title":"fake TF1","radio":false},"start":1704798120, "plaintext":"test2"} ], "elapsed_time_ms":335}