Skip to content

Commit

Permalink
test: keyword updated
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus committed Mar 1, 2024
1 parent a821795 commit 8701749
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 54 deletions.
9 changes: 3 additions & 6 deletions quotaclimat/data_processing/mediatree/keyword/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@
"gaz de schiste",
"gaz naturel liquéfié",
"or noir",
"forages pétroliers",
"puits de pétrole",
"forage pétrolier",
"puit de pétrole",
"engrais azotés",
Expand Down Expand Up @@ -109,8 +107,7 @@
"tempête xynthia", "la roya",
"disparition des coraux", "disparition des récifs coralliens",
"acidification des océans",
"évènements climatiques extrêmes", "aléas climatiques extrêmes", "évènements météorologiques extrêmes", "évènements extrêmes", "épisodes météorologiques extrêmes",
"épisodes climatiques extrêmes", "fortes pluies", "pluies extrêmes", "pluies intenses", "importantes précipitations", "inondations exceptionnelles", "crue centenaire", "cyclones",
"forte pluie", "pluie extrême", "pluie intense", "importantes précipitations", "inondations exceptionnelles", "crue centenaire", "cyclones",
"mortalité forestière", "disparition des forêts"
,"glissement de terrain",
"pénurie d’eau",
Expand Down Expand Up @@ -149,7 +146,7 @@
"capture et séquestration de carbone", "séquestration du carbone"
,"absorption du carbone", "puits de carbone", "puits carbone"
,"industrie verte"
,"agriculture biologique", "agroécologie", "polyculture élevage", "agroforesterie"
, "agroécologie", "polyculture élevage", "agroforesterie"
,"quotas de pêche"
,"stockage du carbone dans les sols", "réduction du cheptel", "agriculture de conservation"
,"restauration des tourbières", "protection des tourbières", "stockage du carbone dans les sols", "biochar", "restauration des zones humides"
Expand Down Expand Up @@ -398,7 +395,7 @@
[
"corail",
"coraux",
"récifs coralliens",
"récif corallien",
"récif de corail",
"barrière de corail",
"espèce menacée", "espèce menacée d’extinction", "Extinction", "crise d'extinction de masse", "en voie de disparition", "Liste rouge de l'UICN"
Expand Down
82 changes: 43 additions & 39 deletions test/sitemap/test_detect_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,67 +11,71 @@
original_timestamp = 1706437079004
start = datetime.utcfromtimestamp(original_timestamp / 1000)

def test_get_themes_keywords_duration():

subtitles = [{
"duration_ms": 34,
"cts_in_ms": original_timestamp,
"text": "gilets"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 6,
"text": "solaires"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 38,
"text": "jaunes"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 72,
"text": "économie"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 76,
"text": "circulaire"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 76,
"text": "abusive"
}
]

subtitles = [{
"duration_ms": 34,
"cts_in_ms": original_timestamp,
"text": "gilets"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 6,
"text": "solaires"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 38,
"text": "jaunes"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 72,
"text": "économie"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 76,
"text": "circulaire"
},
{
"duration_ms": 34,
"cts_in_ms": original_timestamp + 76,
"text": "abusive"
}
]
def test_default_get_themes_keywords_duration():
plaintext_nothing = "cheese pizza"
assert get_themes_keywords_duration(plaintext_nothing, subtitles, start) == [None,None, None]

def test_one_theme_get_themes_keywords_duration():
plaintext_climat = "climatique test"
assert get_themes_keywords_duration(plaintext_climat, subtitles, start) == [["changement_climatique_constat"],[], 0]

def test_multiple_get_themes_keywords_duration():
plaintext_multiple_themes = "climatique test bovin migrations climatiques"
assert get_themes_keywords_duration(plaintext_multiple_themes, subtitles, start) == [["changement_climatique_constat", "changement_climatique_consequences"],[], 0]

def test_nothing_get_themes_keywords_duration():
# should not accept theme 'bus' for keyword "abusive"
plaintext_regression_incomplete_word = "abusive"
assert get_themes_keywords_duration(plaintext_regression_incomplete_word, subtitles, start) == [None,None, None]

def test_regression_included_get_themes_keywords_duration():
# should not accept theme 'ngt' for keyword "vingt"
plaintext_regression_incomplete_word_ngt = "vingt"
assert get_themes_keywords_duration(plaintext_regression_incomplete_word_ngt, subtitles, start) == [None,None, None]


def test_three_get_themes_keywords_duration():
assert get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) == [[
"changement_climatique_constat"
,"changement_climatique_consequences"
,"adaptation_climatique_solutions_directes"
],[], 0]


assert get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) == [[
"changement_climatique_consequences"
,"atténuation_climatique_solutions_directes"
],[], 0]
def test_long_get_themes_keywords_duration():
assert get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) == [
["adaptation_climatique_solutions_indirectes"],[], 0]

def test_get_cts_in_ms_for_keywords():
str = [{
Expand Down Expand Up @@ -398,7 +402,7 @@ def test_is_word_in_sentence():
def test_format_word_regex():
assert format_word_regex("voitures") == "voitures?"
assert format_word_regex("voiture") == "voitures?"
assert format_word_regex("coraux") == "coraux"
assert format_word_regex("coraux") == "coraux?"
assert format_word_regex("d'eau") == "d' ?eaus?"
assert format_word_regex("réseaux") == "réseaux?"

Expand Down
13 changes: 4 additions & 9 deletions test/sitemap/test_main_import_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_second_row_api_import():
"changement_climatique_constat",
"changement_climatique_causes_indirectes",
"changement_climatique_consequences",
"atténuation_climatique_solutions_directes"
"adaptation_climatique_solutions_indirectes"
]

assert specific_keyword.keywords_with_timestamp == [ # from metabase to speedup check
Expand All @@ -69,15 +69,10 @@ def test_second_row_api_import():
"theme": "changement_climatique_constat"
},
{
"keyword": "puits de pétrole",
"keyword": "puit de pétrole",
"timestamp": 1707627628054,
"theme": "changement_climatique_causes_indirectes"
},
{
"keyword": "pénurie",
"timestamp": 1707627683045,
"theme": "changement_climatique_consequences"
},
{
"keyword": "submersion",
"timestamp": 1707627611094,
Expand All @@ -86,10 +81,10 @@ def test_second_row_api_import():
{
"keyword": "barrage",
"timestamp": 1707627686004,
"theme": "atténuation_climatique_solutions_directes"
"theme": "adaptation_climatique_solutions_indirectes"
}
]
assert specific_keyword.number_of_keywords == 4
assert specific_keyword.number_of_keywords == 3

def test_third_row_api_import():
primary_key = "975b41e76d298711cf55113a282e7f11c28157d761233838bb700253d47be262"
Expand Down

1 comment on commit 8701749

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py44784%37–39, 58–60, 65
   insert_existing_data_example.py20385%25–27
postgres/schemas
   models.py721579%74–81, 91–92, 101–111
quotaclimat/data_analytics
   analytics_signataire_charte.py29290%1–67
   bilan.py1081080%2–372
   data_coverage.py34340%1–94
   exploration.py1251250%1–440
   sitemap_analytics.py1181180%1–343
quotaclimat/data_ingestion
   categorization_program_type.py110%1
   config_youtube.py110%1
   scaleway_db_backups.py34340%1–74
   scrap_chartejournalismeecologie_signataires.py50500%1–169
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
   scrap_tv_program.py62620%1–149
   scrap_youtube.py1141140%1–238
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py594131%21–42, 45–65, 69–80
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py17710342%38–42, 47–53, 57–60, 66, 69–96, 102–117, 122–124, 149–161, 165–168, 172–178, 189–200, 203–207, 213, 237–238, 242, 246–265, 268–270
   config.py15287%7, 16
   detect_keywords.py145696%77–79, 167–169
   utils.py662267%19, 30–54, 57, 76–77
quotaclimat/data_processing/sitemap
   sitemap_processing.py412734%15–19, 23–25, 29–47, 51–58, 66–96, 101–103
quotaclimat/utils
   channels.py660%1–95
   climate_keywords.py220%3–35
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   plotly_theme.py17170%1–56
   sentry.py10280%21–22
TOTAL162597440% 

Tests Skipped Failures Errors Time
63 0 💤 0 ❌ 0 🔥 52.998s ⏱️

Please sign in to comment.