diff --git a/quotaclimat/data_processing/mediatree/keyword/keyword.py b/quotaclimat/data_processing/mediatree/keyword/keyword.py index c0ad2726c..90cf7a725 100644 --- a/quotaclimat/data_processing/mediatree/keyword/keyword.py +++ b/quotaclimat/data_processing/mediatree/keyword/keyword.py @@ -73,8 +73,6 @@ "gaz de schiste", "gaz naturel liquéfié", "or noir", - "forages pétroliers", - "puits de pétrole", "forage pétrolier", "puit de pétrole", "engrais azotés", @@ -109,8 +107,7 @@ "tempête xynthia", "la roya", "disparition des coraux", "disparition des récifs coralliens", "acidification des océans", - "évènements climatiques extrêmes", "aléas climatiques extrêmes", "évènements météorologiques extrêmes", "évènements extrêmes", "épisodes météorologiques extrêmes", - "épisodes climatiques extrêmes", "fortes pluies", "pluies extrêmes", "pluies intenses", "importantes précipitations", "inondations exceptionnelles", "crue centenaire", "cyclones", + "forte pluie", "pluie extrême", "pluie intense", "importantes précipitations", "inondations exceptionnelles", "crue centenaire", "cyclones", "mortalité forestière", "disparition des forêts" ,"glissement de terrain", "pénurie d’eau", @@ -149,7 +146,7 @@ "capture et séquestration de carbone", "séquestration du carbone" ,"absorption du carbone", "puits de carbone", "puits carbone" ,"industrie verte" - ,"agriculture biologique", "agroécologie", "polyculture élevage", "agroforesterie" + , "agroécologie", "polyculture élevage", "agroforesterie" ,"quotas de pêche" ,"stockage du carbone dans les sols", "réduction du cheptel", "agriculture de conservation" ,"restauration des tourbières", "protection des tourbières", "stockage du carbone dans les sols", "biochar", "restauration des zones humides" @@ -398,7 +395,7 @@ [ "corail", "coraux", - "récifs coralliens", + "récif corallien", "récif de corail", "barrière de corail", "espèce menacée", "espèce menacée d’extinction", "Extinction", "crise d'extinction de masse", "en voie de disparition", "Liste rouge de l'UICN" diff --git a/test/sitemap/test_detect_keywords.py b/test/sitemap/test_detect_keywords.py index 27b1e36f1..f1a8de25a 100644 --- a/test/sitemap/test_detect_keywords.py +++ b/test/sitemap/test_detect_keywords.py @@ -11,67 +11,71 @@ original_timestamp = 1706437079004 start = datetime.utcfromtimestamp(original_timestamp / 1000) -def test_get_themes_keywords_duration(): - - subtitles = [{ - "duration_ms": 34, - "cts_in_ms": original_timestamp, - "text": "gilets" - }, - { - "duration_ms": 34, - "cts_in_ms": original_timestamp + 6, - "text": "solaires" - }, - { - "duration_ms": 34, - "cts_in_ms": original_timestamp + 38, - "text": "jaunes" - }, - { - "duration_ms": 34, - "cts_in_ms": original_timestamp + 72, - "text": "économie" - }, - { - "duration_ms": 34, - "cts_in_ms": original_timestamp + 76, - "text": "circulaire" - }, - { - "duration_ms": 34, - "cts_in_ms": original_timestamp + 76, - "text": "abusive" - } - ] +subtitles = [{ + "duration_ms": 34, + "cts_in_ms": original_timestamp, + "text": "gilets" + }, + { + "duration_ms": 34, + "cts_in_ms": original_timestamp + 6, + "text": "solaires" + }, + { + "duration_ms": 34, + "cts_in_ms": original_timestamp + 38, + "text": "jaunes" + }, + { + "duration_ms": 34, + "cts_in_ms": original_timestamp + 72, + "text": "économie" + }, + { + "duration_ms": 34, + "cts_in_ms": original_timestamp + 76, + "text": "circulaire" + }, + { + "duration_ms": 34, + "cts_in_ms": original_timestamp + 76, + "text": "abusive" + } +] +def test_default_get_themes_keywords_duration(): plaintext_nothing = "cheese pizza" assert get_themes_keywords_duration(plaintext_nothing, subtitles, start) == [None,None, None] + +def test_one_theme_get_themes_keywords_duration(): plaintext_climat = "climatique test" assert get_themes_keywords_duration(plaintext_climat, subtitles, start) == [["changement_climatique_constat"],[], 0] + +def test_multiple_get_themes_keywords_duration(): plaintext_multiple_themes = "climatique test bovin migrations climatiques" assert get_themes_keywords_duration(plaintext_multiple_themes, subtitles, start) == [["changement_climatique_constat", "changement_climatique_consequences"],[], 0] +def test_nothing_get_themes_keywords_duration(): # should not accept theme 'bus' for keyword "abusive" plaintext_regression_incomplete_word = "abusive" assert get_themes_keywords_duration(plaintext_regression_incomplete_word, subtitles, start) == [None,None, None] +def test_regression_included_get_themes_keywords_duration(): # should not accept theme 'ngt' for keyword "vingt" plaintext_regression_incomplete_word_ngt = "vingt" assert get_themes_keywords_duration(plaintext_regression_incomplete_word_ngt, subtitles, start) == [None,None, None] +def test_three_get_themes_keywords_duration(): assert get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) == [[ "changement_climatique_constat" ,"changement_climatique_consequences" ,"adaptation_climatique_solutions_directes" ],[], 0] - - assert get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) == [[ - "changement_climatique_consequences" - ,"atténuation_climatique_solutions_directes" - ],[], 0] +def test_long_get_themes_keywords_duration(): + assert get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) == [ + ["adaptation_climatique_solutions_indirectes"],[], 0] def test_get_cts_in_ms_for_keywords(): str = [{ @@ -398,7 +402,7 @@ def test_is_word_in_sentence(): def test_format_word_regex(): assert format_word_regex("voitures") == "voitures?" assert format_word_regex("voiture") == "voitures?" - assert format_word_regex("coraux") == "coraux" + assert format_word_regex("coraux") == "coraux?" assert format_word_regex("d'eau") == "d' ?eaus?" assert format_word_regex("réseaux") == "réseaux?" diff --git a/test/sitemap/test_main_import_api.py b/test/sitemap/test_main_import_api.py index 9174017d7..5f3493a30 100644 --- a/test/sitemap/test_main_import_api.py +++ b/test/sitemap/test_main_import_api.py @@ -54,7 +54,7 @@ def test_second_row_api_import(): "changement_climatique_constat", "changement_climatique_causes_indirectes", "changement_climatique_consequences", - "atténuation_climatique_solutions_directes" + "adaptation_climatique_solutions_indirectes" ] assert specific_keyword.keywords_with_timestamp == [ # from metabase to speedup check @@ -69,15 +69,10 @@ def test_second_row_api_import(): "theme": "changement_climatique_constat" }, { - "keyword": "puits de pétrole", + "keyword": "puit de pétrole", "timestamp": 1707627628054, "theme": "changement_climatique_causes_indirectes" }, - { - "keyword": "pénurie", - "timestamp": 1707627683045, - "theme": "changement_climatique_consequences" - }, { "keyword": "submersion", "timestamp": 1707627611094, @@ -86,10 +81,10 @@ def test_second_row_api_import(): { "keyword": "barrage", "timestamp": 1707627686004, - "theme": "atténuation_climatique_solutions_directes" + "theme": "adaptation_climatique_solutions_indirectes" } ] - assert specific_keyword.number_of_keywords == 4 + assert specific_keyword.number_of_keywords == 3 def test_third_row_api_import(): primary_key = "975b41e76d298711cf55113a282e7f11c28157d761233838bb700253d47be262"