From ebcc4ea6e8204b2e65b5c8232d0d59d8a03f6dec Mon Sep 17 00:00:00 2001 From: RDiPiazza Date: Sun, 24 Nov 2024 11:45:34 +0100 Subject: [PATCH] fixed some issues of df formats with new columns --- .../mediatree/update_pg_keywords.py | 35 +++++++++++- test/sitemap/test_detect_keywords.py | 56 +++++++++++++++++-- 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/quotaclimat/data_processing/mediatree/update_pg_keywords.py b/quotaclimat/data_processing/mediatree/update_pg_keywords.py index 6058e594..d46bcfb8 100644 --- a/quotaclimat/data_processing/mediatree/update_pg_keywords.py +++ b/quotaclimat/data_processing/mediatree/update_pg_keywords.py @@ -50,7 +50,18 @@ def update_keywords(session: Session, batch_size: int = 50000, start_date : str ,number_of_biodiversite_solutions_directes \ ,new_number_of_keywords_climat \ ,new_number_of_keywords_biodiversite \ - ,new_number_of_keywords_ressources = get_themes_keywords_duration(plaintext, srt, start) + ,new_number_of_keywords_ressources \ + ,number_of_changement_climatique_constat_no_hrfp \ + ,number_of_changement_climatique_causes_no_hrfp \ + ,number_of_changement_climatique_consequences_no_hrfp \ + ,number_of_attenuation_climatique_solutions_no_hrfp \ + ,number_of_adaptation_climatique_solutions_no_hrfp \ + ,number_of_ressources_no_hrfp \ + ,number_of_ressources_solutions_no_hrfp \ + ,number_of_biodiversite_concepts_generaux_no_hrfp \ + ,number_of_biodiversite_causes_no_hrfp \ + ,number_of_biodiversite_consequences_no_hrfp \ + ,number_of_biodiversite_solutions_no_hrfp = get_themes_keywords_duration(plaintext, srt, start) except Exception as err: logging.error(f"continuing loop but met error : {err}") continue @@ -187,6 +198,17 @@ def update_keyword_row(session: Session, ,number_of_keywords_climat: int ,number_of_keywords_biodiversite: int ,number_of_keywords_ressources: int + ,number_of_changement_climatique_constat_no_hrfp: int, + number_of_changement_climatique_causes_directes_no_hrfp: int, + number_of_changement_climatique_consequences_no_hrfp: int, + number_of_attenuation_climatique_solutions_directes_no_hrfp: int, + number_of_adaptation_climatique_solutions_directes_no_hrfp: int, + number_of_ressources_no_hrfp: int, + number_of_ressources_solutions_no_hrfp: int, + number_of_biodiversite_concepts_generaux_no_hrfp: int, + number_of_biodiversite_causes_directes_no_hrfp: int, + number_of_biodiversite_consequences_no_hrfp: int, + number_of_biodiversite_solutions_directes_no_hrfp: int ): if matching_themes is not None: session.query(Keywords).filter(Keywords.id == keyword_id).update( @@ -209,6 +231,17 @@ def update_keyword_row(session: Session, ,Keywords.number_of_keywords_climat: number_of_keywords_climat ,Keywords.number_of_keywords_biodiversite: number_of_keywords_biodiversite ,Keywords.number_of_keywords_ressources: number_of_keywords_ressources + ,Keywords.number_of_changement_climatique_constat_no_hrfp:number_of_changement_climatique_constat_no_hrfp , + Keywords.number_of_changement_climatique_causes_directes_no_hrfp:number_of_changement_climatique_causes_directes_no_hrfp , + Keywords.number_of_changement_climatique_consequences_no_hrfp:number_of_changement_climatique_consequences_no_hrfp , + Keywords.number_of_attenuation_climatique_solutions_directes_no_hrfp:number_of_attenuation_climatique_solutions_directes_no_hrfp , + Keywords.number_of_adaptation_climatique_solutions_directes_no_hrfp:number_of_adaptation_climatique_solutions_directes_no_hrfp , + Keywords.number_of_ressources_no_hrfp:number_of_ressources_no_hrfp, + Keywords.number_of_ressources_solutions_no_hrfp:number_of_ressources_solutions_no_hrfp , + Keywords.number_of_biodiversite_concepts_generaux_no_hrfp:number_of_biodiversite_concepts_generaux_no_hrfp , + Keywords.number_of_biodiversite_causes_directes_no_hrfp:number_of_biodiversite_causes_directes_no_hrfp , + Keywords.number_of_biodiversite_consequences_no_hrfp:number_of_biodiversite_consequences_no_hrfp , + Keywords.number_of_biodiversite_solutions_directes_no_hrfp:number_of_biodiversite_solutions_directes_no_hrfp, }, synchronize_session=False ) diff --git a/test/sitemap/test_detect_keywords.py b/test/sitemap/test_detect_keywords.py index 7df14d11..a74d6453 100644 --- a/test/sitemap/test_detect_keywords.py +++ b/test/sitemap/test_detect_keywords.py @@ -108,7 +108,18 @@ def test_one_theme_get_themes_keywords_duration(): number_of_biodiversite_solutions_directes ,number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration(plaintext_climat, subtitles, start) + number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_directes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_directes_no_hrfp, + number_of_adaptation_climatique_solutions_directes_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_directes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_directes_no_hrfp) = get_themes_keywords_duration(plaintext_climat, subtitles, start) assert set(themes_output) == set(themes) # assert compare_unordered_lists_of_dicts(keywords_output, keywords) @@ -127,6 +138,7 @@ def test_one_theme_get_themes_keywords_duration(): assert number_of_biodiversite_causes_directes == 0 assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + #TODO: ADD TESTS FOR HRFP def test_long_sentence_theme_get_themes_keywords_duration(): conditions_ts = original_timestamp + 15000 @@ -220,7 +232,18 @@ def test_long_sentence_theme_get_themes_keywords_duration(): number_of_biodiversite_solutions_directes ,number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration(plaintext_climat, subtitles, start) + number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_directes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_directes_no_hrfp, + number_of_adaptation_climatique_solutions_directes_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_directes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_directes_no_hrfp) = get_themes_keywords_duration(plaintext_climat, subtitles, start) assert set(themes_output) == set(themes) # assert compare_unordered_lists_of_dicts(keywords_output, keywords) @@ -237,6 +260,7 @@ def test_long_sentence_theme_get_themes_keywords_duration(): assert number_of_biodiversite_causes_directes == 0 assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + #TODO: ADD TESTS FOR HRFP def test_nothing_get_themes_keywords_duration(): # should not accept theme 'bus' for keyword "abusive" @@ -280,7 +304,18 @@ def test_three_get_themes_keywords_duration(): number_of_biodiversite_solutions_directes ,number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) + number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_directes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_directes_no_hrfp, + number_of_adaptation_climatique_solutions_directes_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_directes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_directes_no_hrfp) = get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) assert set(themes_output)== themes #assert keywords_output == keywords @@ -296,6 +331,7 @@ def test_three_get_themes_keywords_duration(): assert number_of_biodiversite_causes_directes == 0 assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + #TODO: ADD TESTS FOR HRFP def test_long_get_themes_keywords_duration(): themes= set([ @@ -338,7 +374,18 @@ def test_long_get_themes_keywords_duration(): number_of_biodiversite_solutions_directes ,number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) + number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_directes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_directes_no_hrfp, + number_of_adaptation_climatique_solutions_directes_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_directes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_directes_no_hrfp) = get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) assert set(themes_output) == set(themes) # too hard to maintain #assert compare_unordered_lists_of_dicts(keywords_output, keywords) @@ -354,6 +401,7 @@ def test_long_get_themes_keywords_duration(): assert number_of_biodiversite_causes_directes == 0 assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + #TODO: ADD TESTS FOR HRFP def test_stop_word_get_themes_keywords_duration(): plaintext = "haute isolation thermique fabriqué en france pizza"