diff --git a/alembic/versions/ac96222af6fe_hrfp_counters.py b/alembic/versions/ac96222af6fe_hrfp_counters.py new file mode 100644 index 00000000..9d4ca281 --- /dev/null +++ b/alembic/versions/ac96222af6fe_hrfp_counters.py @@ -0,0 +1,50 @@ +"""hrfp counters + +Revision ID: ac96222af6fe +Revises: 30abfd828007 +Create Date: 2024-12-02 14:36:21.970968 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'ac96222af6fe' +down_revision: Union[str, None] = '30abfd828007' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('keywords', sa.Column('number_of_changement_climatique_constat_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_changement_climatique_causes_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_changement_climatique_consequences_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_attenuation_climatique_solutions_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_adaptation_climatique_solutions_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_ressources_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_ressources_solutions_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_biodiversite_concepts_generaux_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_biodiversite_causes_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_biodiversite_consequences_no_hrfp', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_biodiversite_solutions_no_hrfp', sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('keywords', 'number_of_biodiversite_solutions_no_hrfp') + op.drop_column('keywords', 'number_of_biodiversite_consequences_no_hrfp') + op.drop_column('keywords', 'number_of_biodiversite_causes_no_hrfp') + op.drop_column('keywords', 'number_of_biodiversite_concepts_generaux_no_hrfp') + op.drop_column('keywords', 'number_of_ressources_solutions_no_hrfp') + op.drop_column('keywords', 'number_of_ressources_no_hrfp') + op.drop_column('keywords', 'number_of_adaptation_climatique_solutions_no_hrfp') + op.drop_column('keywords', 'number_of_attenuation_climatique_solutions_no_hrfp') + op.drop_column('keywords', 'number_of_changement_climatique_consequences_no_hrfp') + op.drop_column('keywords', 'number_of_changement_climatique_causes_no_hrfp') + op.drop_column('keywords', 'number_of_changement_climatique_constat_no_hrfp') + # ### end Alembic commands ### diff --git a/postgres/schemas/models.py b/postgres/schemas/models.py index ebc939a0..1f48613e 100644 --- a/postgres/schemas/models.py +++ b/postgres/schemas/models.py @@ -87,6 +87,17 @@ class Keywords(Base): number_of_keywords_climat = Column(Integer) # sum of all climatique counters without duplicate (like number_of_keywords) number_of_keywords_biodiversite = Column(Integer) # sum of all biodiversite counters without duplicate number_of_keywords_ressources = Column(Integer) # sum of all ressources counters without duplicate + number_of_changement_climatique_constat_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_changement_climatique_constat integer; + number_of_changement_climatique_causes_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_changement_climatique_causes_directes integer; + number_of_changement_climatique_consequences_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_changement_climatique_consequences integer; + number_of_attenuation_climatique_solutions_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_attenuation_climatique_solutions_directes integer; + number_of_adaptation_climatique_solutions_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_adaptation_climatique_solutions_directes integer; + number_of_ressources_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_ressources_naturelles_concepts_generaux integer; + number_of_ressources_solutions_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_ressources_solutions integer; + number_of_biodiversite_concepts_generaux_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_biodiversite_concepts_generaux integer; + number_of_biodiversite_causes_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_biodiversite_causes_directes integer; + number_of_biodiversite_consequences_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_biodiversite_consequences integer; + number_of_biodiversite_solutions_no_hrfp= Column(Integer) # ALTER TABLE keywords ADD number_of_biodiversite_solutions_directes integer; class Channel_Metadata(Base): __tablename__ = channel_metadata_table diff --git a/quotaclimat/data_processing/mediatree/detect_keywords.py b/quotaclimat/data_processing/mediatree/detect_keywords.py index 9f2be80a..04007005 100644 --- a/quotaclimat/data_processing/mediatree/detect_keywords.py +++ b/quotaclimat/data_processing/mediatree/detect_keywords.py @@ -131,7 +131,7 @@ def remove_stopwords(plaintext: str) -> str: @sentry_sdk.trace def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], start: datetime): keywords_with_timestamp = [] - number_of_elements_in_array = 17 + number_of_elements_in_array = 28 default_window_in_seconds = DEFAULT_WINDOW_DURATION plaitext_without_stopwords = remove_stopwords(plaintext) logging.debug(f"display datetime start {start}") @@ -192,8 +192,32 @@ def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], s number_of_biodiversite_causes = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_causes"]) number_of_biodiversite_consequences = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_consequences"]) number_of_biodiversite_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_solutions"]) - - return [ + + # No high risk of false positive counters + number_of_changement_climatique_constat_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["changement_climatique_constat"], \ + count_high_risk_false_positive=False) + number_of_changement_climatique_causes_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["changement_climatique_causes"], \ + count_high_risk_false_positive=False) + number_of_changement_climatique_consequences_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["changement_climatique_consequences"], \ + count_high_risk_false_positive=False) + number_of_attenuation_climatique_solutions_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["attenuation_climatique_solutions"], \ + count_high_risk_false_positive=False) + number_of_adaptation_climatique_solutions_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["adaptation_climatique_solutions"], \ + count_high_risk_false_positive=False) + number_of_ressources_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["ressources"], \ + count_high_risk_false_positive=False) + number_of_ressources_solutions_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["ressources_solutions"], \ + count_high_risk_false_positive=False) + number_of_biodiversite_concepts_generaux_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_concepts_generaux"], \ + count_high_risk_false_positive=False) + number_of_biodiversite_causes_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_causes"], \ + count_high_risk_false_positive=False) + number_of_biodiversite_consequences_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_consequences"], \ + count_high_risk_false_positive=False) + number_of_biodiversite_solutions_no_hrfp = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_solutions"], \ + count_high_risk_false_positive=False) + + return [ # Change number_of_elements_in_array if a new element is added here theme ,keywords_with_timestamp ,number_of_keywords @@ -211,8 +235,20 @@ def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], s ,number_of_keywords_climat ,number_of_keywords_biodiversite ,number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp + ,number_of_changement_climatique_causes_no_hrfp + ,number_of_changement_climatique_consequences_no_hrfp + ,number_of_attenuation_climatique_solutions_no_hrfp + ,number_of_adaptation_climatique_solutions_no_hrfp + ,number_of_ressources_no_hrfp + ,number_of_ressources_solutions_no_hrfp + ,number_of_biodiversite_concepts_generaux_no_hrfp + ,number_of_biodiversite_causes_no_hrfp + ,number_of_biodiversite_consequences_no_hrfp + ,number_of_biodiversite_solutions_no_hrfp ] else: + logging.info("Empty keywords") return [None] * number_of_elements_in_array def get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds: int = 20): @@ -274,6 +310,17 @@ def filter_and_tag_by_theme(df: pd.DataFrame) -> pd.DataFrame : ,"number_of_keywords_climat" ,"number_of_keywords_biodiversite" ,"number_of_keywords_ressources" + ,"number_of_changement_climatique_constat_no_hrfp" + ,"number_of_changement_climatique_causes_no_hrfp" + ,"number_of_changement_climatique_consequences_no_hrfp" + ,"number_of_attenuation_climatique_solutions_no_hrfp" + ,"number_of_adaptation_climatique_solutions_no_hrfp" + ,"number_of_ressources_no_hrfp" + ,"number_of_ressources_solutions_no_hrfp" + ,"number_of_biodiversite_concepts_generaux_no_hrfp" + ,"number_of_biodiversite_causes_no_hrfp" + ,"number_of_biodiversite_consequences_no_hrfp" + ,"number_of_biodiversite_solutions_no_hrfp" ] ] = df[['plaintext','srt', 'start']]\ .swifter.apply(\ @@ -282,9 +329,10 @@ def filter_and_tag_by_theme(df: pd.DataFrame) -> pd.DataFrame : result_type='expand' ) + logging.info("Dropping") # remove all rows that does not have themes df = df.dropna(subset=['theme'], how='any') # any is for None values - + logging.info("Droped") logging.info(f"After filtering with out keywords, we have {len(df)} out of {count_before_filtering} subtitles left that are insteresting for us") return df @@ -302,15 +350,21 @@ def add_primary_key(row): def filter_indirect_words(keywords_with_timestamp: List[dict]) -> List[dict]: return list(filter(lambda kw: indirectes not in kw['theme'], keywords_with_timestamp)) -def count_keywords_duration_overlap(keywords_with_timestamp: List[dict], start: datetime, theme: List[str] = None) -> int: +def filter_high_risk_false_positive(keywords_with_timestamp: List[dict]) -> List[dict]: + return list(filter(lambda kw: 'hrfp' not in kw, keywords_with_timestamp)) + +def count_keywords_duration_overlap(keywords_with_timestamp: List[dict], start: datetime, theme: List[str] = None, count_high_risk_false_positive: bool = True) -> int: total_keywords = len(keywords_with_timestamp) if(total_keywords) == 0: return 0 else: + logging.debug(f"keywords_with_timestamp is {keywords_with_timestamp}") if theme is not None: logging.debug(f"filter theme {theme}") keywords_with_timestamp = list(filter(lambda kw: kw['theme'] in theme, keywords_with_timestamp)) - + if count_high_risk_false_positive is False: + keywords_with_timestamp = filter_high_risk_false_positive(keywords_with_timestamp) + logging.debug(f"keywords_with_timestamp is after filtering {keywords_with_timestamp}") length_filtered_items = len(keywords_with_timestamp) if length_filtered_items > 0: @@ -357,7 +411,9 @@ def transform_false_positive_keywords_to_positive(keywords_with_timestamp: List[ if( contains_direct_keywords_same_suject(neighbour_keywords, keyword_info['theme']) ) : logging.debug(f"Transforming false positive to positive { keyword_info['keyword']} { keyword_info['theme']}") - keyword_info['theme'] = remove_indirect(keyword_info['theme']) + if indirectes in keyword_info['theme']: + keyword_info['theme'] = remove_indirect(keyword_info['theme']) + keyword_info['hrfp'] = True # to store if a keyword was a transformed to a direct keyword return keywords_with_timestamp @@ -383,7 +439,4 @@ def tag_wanted_duration_second_window_number(keywords_with_timestamp: List[dict] return keywords_with_timestamp def remove_indirect(theme: str) -> str: - if indirectes in theme: - return theme.replace(f'_{indirectes}', '') - else: - return theme + return theme.replace(f'_{indirectes}', '') \ No newline at end of file diff --git a/quotaclimat/data_processing/mediatree/update_pg_keywords.py b/quotaclimat/data_processing/mediatree/update_pg_keywords.py index d4fe0e89..ffbdbe70 100644 --- a/quotaclimat/data_processing/mediatree/update_pg_keywords.py +++ b/quotaclimat/data_processing/mediatree/update_pg_keywords.py @@ -50,7 +50,18 @@ def update_keywords(session: Session, batch_size: int = 50000, start_date : str ,number_of_biodiversite_solutions_directes \ ,new_number_of_keywords_climat \ ,new_number_of_keywords_biodiversite \ - ,new_number_of_keywords_ressources = get_themes_keywords_duration(plaintext, srt, start) + ,new_number_of_keywords_ressources \ + ,number_of_changement_climatique_constat_no_hrfp \ + ,number_of_changement_climatique_causes_no_hrfp \ + ,number_of_changement_climatique_consequences_no_hrfp \ + ,number_of_attenuation_climatique_solutions_no_hrfp \ + ,number_of_adaptation_climatique_solutions_no_hrfp \ + ,number_of_ressources_no_hrfp \ + ,number_of_ressources_solutions_no_hrfp \ + ,number_of_biodiversite_concepts_generaux_no_hrfp \ + ,number_of_biodiversite_causes_no_hrfp \ + ,number_of_biodiversite_consequences_no_hrfp \ + ,number_of_biodiversite_solutions_no_hrfp = get_themes_keywords_duration(plaintext, srt, start) except Exception as err: logging.error(f"continuing loop but met error : {err}") continue @@ -83,10 +94,21 @@ def update_keywords(session: Session, batch_size: int = 50000, start_date : str ,number_of_biodiversite_causes_directes ,number_of_biodiversite_consequences ,number_of_biodiversite_solutions_directes - ,channel_title=channel_title - ,number_of_keywords_climat=new_number_of_keywords_climat - ,number_of_keywords_biodiversite=new_number_of_keywords_biodiversite - ,number_of_keywords_ressources=new_number_of_keywords_ressources + ,channel_title + ,new_number_of_keywords_climat + ,new_number_of_keywords_biodiversite + ,new_number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp + ,number_of_changement_climatique_causes_no_hrfp + ,number_of_changement_climatique_consequences_no_hrfp + ,number_of_attenuation_climatique_solutions_no_hrfp + ,number_of_adaptation_climatique_solutions_no_hrfp + ,number_of_ressources_no_hrfp + ,number_of_ressources_solutions_no_hrfp + ,number_of_biodiversite_concepts_generaux_no_hrfp + ,number_of_biodiversite_causes_no_hrfp + ,number_of_biodiversite_consequences_no_hrfp + ,number_of_biodiversite_solutions_no_hrfp ) else: # Program only mode logging.info(f"Updating program for keyword {keyword_id} - {channel_name} - original tz : {start}") @@ -176,6 +198,17 @@ def update_keyword_row(session: Session, ,number_of_keywords_climat: int ,number_of_keywords_biodiversite: int ,number_of_keywords_ressources: int + ,number_of_changement_climatique_constat_no_hrfp: int, + number_of_changement_climatique_causes_no_hrfp: int, + number_of_changement_climatique_consequences_no_hrfp: int, + number_of_attenuation_climatique_solutions_no_hrfp: int, + number_of_adaptation_climatique_solutions_no_hrfp: int, + number_of_ressources_no_hrfp: int, + number_of_ressources_solutions_no_hrfp: int, + number_of_biodiversite_concepts_generaux_no_hrfp: int, + number_of_biodiversite_causes_no_hrfp: int, + number_of_biodiversite_consequences_no_hrfp: int, + number_of_biodiversite_solutions_no_hrfp: int ): if matching_themes is not None: session.query(Keywords).filter(Keywords.id == keyword_id).update( @@ -198,6 +231,17 @@ def update_keyword_row(session: Session, ,Keywords.number_of_keywords_climat: number_of_keywords_climat ,Keywords.number_of_keywords_biodiversite: number_of_keywords_biodiversite ,Keywords.number_of_keywords_ressources: number_of_keywords_ressources + ,Keywords.number_of_changement_climatique_constat_no_hrfp:number_of_changement_climatique_constat_no_hrfp , + Keywords.number_of_changement_climatique_causes_no_hrfp:number_of_changement_climatique_causes_no_hrfp , + Keywords.number_of_changement_climatique_consequences_no_hrfp:number_of_changement_climatique_consequences_no_hrfp , + Keywords.number_of_attenuation_climatique_solutions_no_hrfp:number_of_attenuation_climatique_solutions_no_hrfp , + Keywords.number_of_adaptation_climatique_solutions_no_hrfp:number_of_adaptation_climatique_solutions_no_hrfp , + Keywords.number_of_ressources_no_hrfp:number_of_ressources_no_hrfp, + Keywords.number_of_ressources_solutions_no_hrfp:number_of_ressources_solutions_no_hrfp , + Keywords.number_of_biodiversite_concepts_generaux_no_hrfp:number_of_biodiversite_concepts_generaux_no_hrfp , + Keywords.number_of_biodiversite_causes_no_hrfp:number_of_biodiversite_causes_no_hrfp , + Keywords.number_of_biodiversite_consequences_no_hrfp:number_of_biodiversite_consequences_no_hrfp , + Keywords.number_of_biodiversite_solutions_no_hrfp:number_of_biodiversite_solutions_no_hrfp, }, synchronize_session=False ) diff --git a/secrets/.empty b/secrets/.empty new file mode 100644 index 00000000..e69de29b diff --git a/test/sitemap/test_detect_keywords.py b/test/sitemap/test_detect_keywords.py index 7df14d11..383a23d7 100644 --- a/test/sitemap/test_detect_keywords.py +++ b/test/sitemap/test_detect_keywords.py @@ -4,13 +4,14 @@ from quotaclimat.data_processing.mediatree.utils import * from quotaclimat.data_processing.mediatree.detect_keywords import * - +from datetime import datetime, timezone import pandas as pd localhost = get_localhost() original_timestamp = 1706437079004 -start = datetime.utcfromtimestamp(original_timestamp / 1000) +start = datetime.fromtimestamp(original_timestamp / 1000, timezone.utc) +array_of_none = [None] * 28 subtitles = [{ "duration_ms": 34, @@ -76,21 +77,32 @@ "duration_ms": 34, "cts_in_ms": original_timestamp + 1300, "text": "planétaire" + }, + { + "duration_ms": 34, + "cts_in_ms": original_timestamp + 1500, + "text": "recyclage" + }, + { + "duration_ms": 34, + "cts_in_ms": original_timestamp + 1800, + "text": "climatique" } ] -def test_default_get_themes_keywords_duration(): - plaintext_nothing = "cheese pizza" - assert get_themes_keywords_duration(plaintext_nothing, subtitles, start) == [None] * 17 - -def test_one_theme_get_themes_keywords_duration(): - plaintext_climat = "réchauffement planétaire test" - keywords = [{ +keywords = [{ 'category': 'General', 'keyword': 'réchauffement planétaire', 'theme': 'changement_climatique_constat', 'timestamp': 1706437080216, } ] +def test_default_get_themes_keywords_duration(): + plaintext_nothing = "cheese pizza" + assert get_themes_keywords_duration(plaintext_nothing, subtitles, start) == array_of_none + +def test_one_theme_get_themes_keywords_duration(): + plaintext_climat = "réchauffement planétaire test" + themes = ['changement_climatique_constat', 'ressources_indirectes'] (themes_output, keywords_output, @@ -106,9 +118,20 @@ def test_one_theme_get_themes_keywords_duration(): number_of_biodiversite_causes_directes, number_of_biodiversite_consequences, number_of_biodiversite_solutions_directes - ,number_of_keywords_climat, + , number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration(plaintext_climat, subtitles, start) + number_of_keywords_ressources + , number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_no_hrfp, + number_of_adaptation_climatique_solutions_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_no_hrfp) = get_themes_keywords_duration(plaintext_climat, subtitles, start) assert set(themes_output) == set(themes) # assert compare_unordered_lists_of_dicts(keywords_output, keywords) @@ -128,6 +151,94 @@ def test_one_theme_get_themes_keywords_duration(): assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + assert number_of_changement_climatique_constat_no_hrfp == 1 + assert number_of_changement_climatique_causes_no_hrfp == 0 + assert number_of_changement_climatique_consequences_no_hrfp == 0 + assert number_of_attenuation_climatique_solutions_no_hrfp == 0 + assert number_of_adaptation_climatique_solutions_no_hrfp == 0 + assert number_of_ressources_no_hrfp == 0 + assert number_of_ressources_solutions_no_hrfp == 0 + assert number_of_biodiversite_concepts_generaux_no_hrfp == 0 + assert number_of_biodiversite_causes_no_hrfp == 0 + assert number_of_biodiversite_consequences_no_hrfp == 0 + assert number_of_biodiversite_solutions_no_hrfp == 0 + +def test_two_themes_one_hrfp_get_themes_keywords_duration(): + plaintext_climat = "pizza recyclage climatique pizza" + original_timestamp = 1706437080216 + keywords_with_timestamp = [ + { + 'keyword': 'recyclage', + 'timestamp': original_timestamp, + 'theme': 'attenuation_climatique_solutions_indirectes' # should be transformed to direct + }, + {'keyword': 'climatique', + 'timestamp': original_timestamp + 150, + 'theme': 'changement_climatique_constat' + } + ] + themes = ['changement_climatique_constat', 'attenuation_climatique_solutions', 'ressources_solutions'] + + (themes_output, keywords_output, + number_of_keywords, + number_of_changement_climatique_constat, + number_of_changement_climatique_causes_directes, + number_of_changement_climatique_consequences, + number_of_attenuation_climatique_solutions_directes, + number_of_adaptation_climatique_solutions_directes, + number_of_ressources, + number_of_ressources_solutions, + number_of_biodiversite_concepts_generaux, + number_of_biodiversite_causes_directes, + number_of_biodiversite_consequences, + number_of_biodiversite_solutions_directes + , number_of_keywords_climat, + number_of_keywords_biodiversite, + number_of_keywords_ressources + , number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_no_hrfp, + number_of_adaptation_climatique_solutions_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_no_hrfp) = get_themes_keywords_duration(plaintext_climat, subtitles, start) + + logging.info(f"Test got keywords_output: {keywords_output}") + assert set(themes_output) == set(themes) + # assert compare_unordered_lists_of_dicts(keywords_output, keywords) + + assert number_of_keywords == 1 + assert number_of_keywords_climat == 1 + assert number_of_keywords_biodiversite == 0 + assert number_of_keywords_ressources == 1 + assert number_of_changement_climatique_constat == 1 + assert number_of_changement_climatique_causes_directes == 0 + assert number_of_changement_climatique_consequences == 0 + assert number_of_attenuation_climatique_solutions_directes == 1 + assert number_of_adaptation_climatique_solutions_directes == 0 + assert number_of_ressources == 0 + assert number_of_ressources_solutions == 1 + assert number_of_biodiversite_concepts_generaux == 0 + assert number_of_biodiversite_causes_directes == 0 + assert number_of_biodiversite_consequences == 0 + assert number_of_biodiversite_solutions_directes == 0 + assert number_of_changement_climatique_constat_no_hrfp == 1 + assert number_of_changement_climatique_causes_no_hrfp == 0 + assert number_of_changement_climatique_consequences_no_hrfp == 0 + assert number_of_attenuation_climatique_solutions_no_hrfp == 1 + assert number_of_adaptation_climatique_solutions_no_hrfp == 0 + assert number_of_ressources_no_hrfp == 0 + assert number_of_ressources_solutions_no_hrfp == 1 + assert number_of_biodiversite_concepts_generaux_no_hrfp == 0 + assert number_of_biodiversite_causes_no_hrfp == 0 + assert number_of_biodiversite_consequences_no_hrfp == 0 + assert number_of_biodiversite_solutions_no_hrfp == 0 + + def test_long_sentence_theme_get_themes_keywords_duration(): conditions_ts = original_timestamp + 15000 habitabilite_ts = original_timestamp + 6 @@ -218,9 +329,20 @@ def test_long_sentence_theme_get_themes_keywords_duration(): number_of_biodiversite_causes_directes, number_of_biodiversite_consequences, number_of_biodiversite_solutions_directes - ,number_of_keywords_climat, + , number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration(plaintext_climat, subtitles, start) + number_of_keywords_ressources + , number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_no_hrfp, + number_of_adaptation_climatique_solutions_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_no_hrfp) = get_themes_keywords_duration(plaintext_climat, subtitles, start) assert set(themes_output) == set(themes) # assert compare_unordered_lists_of_dicts(keywords_output, keywords) @@ -237,30 +359,48 @@ def test_long_sentence_theme_get_themes_keywords_duration(): assert number_of_biodiversite_causes_directes == 0 assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + assert number_of_changement_climatique_constat_no_hrfp == 1 + assert number_of_changement_climatique_causes_no_hrfp == 0 + assert number_of_changement_climatique_consequences_no_hrfp == 0 + assert number_of_attenuation_climatique_solutions_no_hrfp == 0 + assert number_of_adaptation_climatique_solutions_no_hrfp == 0 + assert number_of_ressources_no_hrfp == 0 + assert number_of_ressources_solutions_no_hrfp == 0 + assert number_of_biodiversite_concepts_generaux_no_hrfp == 0 + assert number_of_biodiversite_causes_no_hrfp == 0 + assert number_of_biodiversite_consequences_no_hrfp == 0 + assert number_of_biodiversite_solutions_no_hrfp == 0 + def test_nothing_get_themes_keywords_duration(): # should not accept theme 'bus' for keyword "abusive" plaintext_regression_incomplete_word = "abusive" - assert get_themes_keywords_duration(plaintext_regression_incomplete_word, subtitles, start) == [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None] + assert get_themes_keywords_duration(plaintext_regression_incomplete_word, subtitles, start) == array_of_none def test_regression_included_get_themes_keywords_duration(): # should not accept theme 'ngt' for keyword "vingt" plaintext_regression_incomplete_word_ngt = "vingt" - assert get_themes_keywords_duration(plaintext_regression_incomplete_word_ngt, subtitles, start) == [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None] + assert get_themes_keywords_duration(plaintext_regression_incomplete_word_ngt, subtitles, start) == array_of_none +def test_filter_high_risk_false_positive_without_hrfp(): + result = filter_high_risk_false_positive(keywords_with_timestamp=keywords) + assert result == keywords + + +def test_filter_high_risk_false_positive_with_hrfp(): + keywords = keywords = [{ + 'category': 'General', + 'keyword': 'réchauffement planétaire', + 'theme': 'changement_climatique_constat', + 'timestamp': 1706437080216, + 'hrfp': True + } + ] + + result = filter_high_risk_false_positive(keywords_with_timestamp=keywords) + assert result == [] def test_three_get_themes_keywords_duration(): - keywords = [ - {'category': 'General', - 'keyword': 'adaptation au dérèglement climatique', - 'theme': 'adaptation_climatique_solutions', - 'timestamp': 1706437080004} - ,{ - 'category': '', - 'keyword': 'record de température', - 'theme': 'changement_climatique_consequences', - 'timestamp': 1706437080004, - }] themes = set([ "adaptation_climatique_solutions", 'changement_climatique_consequences' ]) @@ -280,10 +420,21 @@ def test_three_get_themes_keywords_duration(): number_of_biodiversite_solutions_directes ,number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) + number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_no_hrfp, + number_of_adaptation_climatique_solutions_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_no_hrfp) = get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) assert set(themes_output)== themes - #assert keywords_output == keywords + logging.info(f"Got keywords: {keywords_output}") assert number_of_keywords == 1 assert number_of_changement_climatique_constat == 0 assert number_of_changement_climatique_causes_directes == 0 @@ -296,6 +447,17 @@ def test_three_get_themes_keywords_duration(): assert number_of_biodiversite_causes_directes == 0 assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + assert number_of_changement_climatique_constat_no_hrfp == 0 + assert number_of_changement_climatique_causes_no_hrfp == 0 + assert number_of_changement_climatique_consequences_no_hrfp == 1 + assert number_of_attenuation_climatique_solutions_no_hrfp == 0 + assert number_of_adaptation_climatique_solutions_no_hrfp == 1 + assert number_of_ressources_no_hrfp == 0 + assert number_of_ressources_solutions_no_hrfp == 0 + assert number_of_biodiversite_concepts_generaux_no_hrfp == 0 + assert number_of_biodiversite_causes_no_hrfp == 0 + assert number_of_biodiversite_consequences_no_hrfp == 0 + assert number_of_biodiversite_solutions_no_hrfp == 0 def test_long_get_themes_keywords_duration(): themes= set([ @@ -338,7 +500,18 @@ def test_long_get_themes_keywords_duration(): number_of_biodiversite_solutions_directes ,number_of_keywords_climat, number_of_keywords_biodiversite, - number_of_keywords_ressources) = get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) + number_of_keywords_ressources + ,number_of_changement_climatique_constat_no_hrfp, + number_of_changement_climatique_causes_no_hrfp, + number_of_changement_climatique_consequences_no_hrfp, + number_of_attenuation_climatique_solutions_no_hrfp, + number_of_adaptation_climatique_solutions_no_hrfp, + number_of_ressources_no_hrfp, + number_of_ressources_solutions_no_hrfp, + number_of_biodiversite_concepts_generaux_no_hrfp, + number_of_biodiversite_causes_no_hrfp, + number_of_biodiversite_consequences_no_hrfp, + number_of_biodiversite_solutions_no_hrfp) = get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) assert set(themes_output) == set(themes) # too hard to maintain #assert compare_unordered_lists_of_dicts(keywords_output, keywords) @@ -354,14 +527,26 @@ def test_long_get_themes_keywords_duration(): assert number_of_biodiversite_causes_directes == 0 assert number_of_biodiversite_consequences == 0 assert number_of_biodiversite_solutions_directes == 0 + assert number_of_changement_climatique_constat_no_hrfp == 0 + assert number_of_changement_climatique_causes_no_hrfp == 0 + assert number_of_changement_climatique_consequences_no_hrfp == 0 + assert number_of_attenuation_climatique_solutions_no_hrfp == 0 + assert number_of_adaptation_climatique_solutions_no_hrfp == 0 + assert number_of_ressources_no_hrfp == 0 + assert number_of_ressources_solutions_no_hrfp == 0 + assert number_of_biodiversite_concepts_generaux_no_hrfp == 0 + assert number_of_biodiversite_causes_no_hrfp == 0 + assert number_of_biodiversite_consequences_no_hrfp == 0 + assert number_of_biodiversite_solutions_no_hrfp == 0 + def test_stop_word_get_themes_keywords_duration(): plaintext = "haute isolation thermique fabriqué en france pizza" - assert get_themes_keywords_duration(plaintext, subtitles, start) == [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None] + assert get_themes_keywords_duration(plaintext, subtitles, start) == array_of_none def test_train_stop_word_get_themes_keywords_duration(): plaintext = "en train de fabrique en france pizza" - assert get_themes_keywords_duration(plaintext, subtitles, start) == [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None] + assert get_themes_keywords_duration(plaintext, subtitles, start) == array_of_none def test_get_cts_in_ms_for_keywords(): @@ -498,6 +683,17 @@ def test_lower_case_filter_and_tag_by_theme(): ,'number_of_keywords_climat':1, 'number_of_keywords_biodiversite':0, 'number_of_keywords_ressources':0 + ,"number_of_changement_climatique_constat_no_hrfp": 0 + ,"number_of_changement_climatique_causes_no_hrfp": 1 + ,"number_of_changement_climatique_consequences_no_hrfp": 0 + ,"number_of_attenuation_climatique_solutions_no_hrfp": 0 + ,"number_of_adaptation_climatique_solutions_no_hrfp": 0 + ,"number_of_ressources_no_hrfp": 0 + ,"number_of_ressources_solutions_no_hrfp": 0 + ,"number_of_biodiversite_concepts_generaux_no_hrfp": 0 + ,"number_of_biodiversite_causes_no_hrfp": 0 + ,"number_of_biodiversite_consequences_no_hrfp": 0 + ,"number_of_biodiversite_solutions_no_hrfp":0 }]) # List of words to filter on @@ -551,6 +747,17 @@ def test_singular_plural_case_filter_and_tag_by_theme(): ,'number_of_keywords_climat':1, 'number_of_keywords_biodiversite':0, 'number_of_keywords_ressources':0 + ,"number_of_changement_climatique_constat_no_hrfp": 0 + ,"number_of_changement_climatique_causes_no_hrfp": 1 + ,"number_of_changement_climatique_consequences_no_hrfp": 0 + ,"number_of_attenuation_climatique_solutions_no_hrfp": 0 + ,"number_of_adaptation_climatique_solutions_no_hrfp": 0 + ,"number_of_ressources_no_hrfp": 0 + ,"number_of_ressources_solutions_no_hrfp": 0 + ,"number_of_biodiversite_concepts_generaux_no_hrfp": 0 + ,"number_of_biodiversite_causes_no_hrfp": 0 + ,"number_of_biodiversite_consequences_no_hrfp": 0 + ,"number_of_biodiversite_solutions_no_hrfp":0 }]) # List of words to filter on @@ -647,6 +854,17 @@ def test_complexe_filter_and_tag_by_theme(): ,'number_of_keywords_climat':1, 'number_of_keywords_biodiversite':0, 'number_of_keywords_ressources':0 + ,"number_of_changement_climatique_constat_no_hrfp": 0 + ,"number_of_changement_climatique_causes_no_hrfp": 0 + ,"number_of_changement_climatique_consequences_no_hrfp": 0 + ,"number_of_attenuation_climatique_solutions_no_hrfp": 0 + ,"number_of_adaptation_climatique_solutions_no_hrfp": 0 + ,"number_of_ressources_no_hrfp": 0 + ,"number_of_ressources_solutions_no_hrfp": 0 + ,"number_of_biodiversite_concepts_generaux_no_hrfp": 0 + ,"number_of_biodiversite_causes_no_hrfp": 0 + ,"number_of_biodiversite_consequences_no_hrfp": 0 + ,"number_of_biodiversite_solutions_no_hrfp":0 }]) # List of words to filter on @@ -844,6 +1062,40 @@ def test_only_one_count_keywords_duration_overlap(): assert count_keywords_duration_overlap(tag_wanted_duration_second_window_number(keywords_with_timestamp, start, duration_seconds = 15), start) == 1 +def test_only_one_count_keywords_duration_overlap_no_hrfp(): + keywords_with_timestamp = [{ + "keyword" : 'habitabilité de la planète', + "timestamp": original_timestamp, # count for one + "theme":"changement_climatique_constat", + } + ] + + assert count_keywords_duration_overlap(\ + tag_wanted_duration_second_window_number(keywords_with_timestamp, start, duration_seconds = 15),\ + start, count_high_risk_false_positive=False) == 1 + +def test_only_one_hrfp_count_keywords_duration_overlap_no_hrfp(): + keywords_with_timestamp = [{ + "keyword" : 'abeille', + "timestamp": original_timestamp, + "theme":"biodiversite_concepts_generaux_indirectes", # transform to direct via solutions biodiv + }, + { + "keyword" : 'agriculture bio', + "timestamp": original_timestamp + 1000, + "theme":"biodiversite_solutions", + } + ] + + tag = tag_wanted_duration_second_window_number(keywords_with_timestamp, start, duration_seconds = 15) + tag = get_keywords_with_timestamp_with_false_positive(tag, start, duration_seconds=15) + + expected = [{'keyword': 'abeille', 'timestamp': 1706437079004, 'theme': 'biodiversite_concepts_generaux', 'window_number': 0, 'hrfp': True}, + {'keyword': 'agriculture bio', 'timestamp': 1706437080004, 'theme': 'biodiversite_solutions', 'window_number': 0} + ] + assert expected == tag + assert count_keywords_duration_overlap(tag, start, count_high_risk_false_positive=False) == 1 + def test_indirect_count_keywords_duration_overlap(): keywords_with_timestamp = [{ "keyword" : 'digue', @@ -1246,7 +1498,8 @@ def test_tag_wanted_duration_second_window_number(): def test_transform_false_positive_keywords_to_positive(): keywords_with_timestamp = [ - {'keyword': 'recyclage', + { + 'keyword': 'recyclage', 'timestamp': original_timestamp, 'theme': 'attenuation_climatique_solutions_indirectes' # should be transformed to direct }, @@ -1254,11 +1507,13 @@ def test_transform_false_positive_keywords_to_positive(): 'timestamp': original_timestamp + 150, 'theme': 'changement_climatique_constat' }, - {'keyword': 'covoiturage', + { + 'keyword': 'covoiturage', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) + 10000, # should be transformed to direct 'theme': 'attenuation_climatique_solutions_indirectes' }, - {'keyword': 'industrie verte', + { + 'keyword': 'industrie verte', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 2 , 'theme': 'attenuation_climatique_solutions_indirectes' # should be transformed to direct }, @@ -1277,32 +1532,42 @@ def test_transform_false_positive_keywords_to_positive(): ] expected_output = [ - {'keyword': 'recyclage', + { + 'hrfp': True, + 'keyword': 'recyclage', 'timestamp': original_timestamp, 'theme': 'attenuation_climatique_solutions' # was indirect ,'window_number': 0 }, - {'keyword': 'climatique', + { + 'keyword': 'climatique', 'timestamp': original_timestamp + 150, 'theme': 'changement_climatique_constat' # our positive keyword that transform false positive ,'window_number': 0 }, - {'keyword': 'covoiturage', + { + 'hrfp': True, + 'keyword': 'covoiturage', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) + 10000, # should be transformed to direct 'theme': 'attenuation_climatique_solutions' ,'window_number': 1 }, - {'keyword': 'industrie verte', + { + 'hrfp': True, + 'keyword': 'industrie verte', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 2 , 'theme': 'attenuation_climatique_solutions' # should be transformed to direct ,'window_number': 2 }, - {'keyword': 'industrie verte', + { + 'hrfp': True, + 'keyword': 'industrie verte', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 3 , 'theme': 'attenuation_climatique_solutions'# should be transformed to direct ,'window_number': 3 }, - {'keyword': 'industrie verte', + { + 'keyword': 'industrie verte', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 5 , 'theme': 'attenuation_climatique_solutions_indirectes' # should stay to indirect ,'window_number': 5 @@ -1350,22 +1615,26 @@ def test_different_steps_transform_false_positive_keywords_to_positive(): 'window_number': 0, 'theme': 'changement_climatique_constat' }, - {'keyword': 'industrie verte', + {'hrfp': True, + 'keyword': 'industrie verte', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 1 + 150, 'window_number': 1, 'theme': 'attenuation_climatique_solutions' # should be transformed to direct }, - {'keyword': 'agroforesterie', + {'hrfp': True, + 'keyword': 'agroforesterie', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 2 + 150, 'window_number': 2, 'theme': 'attenuation_climatique_solutions' # should be transformed to direct }, - {'keyword': 'alternative durable', + {'hrfp': True, + 'keyword': 'alternative durable', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 3 + 150, 'window_number': 3, 'theme': 'attenuation_climatique_solutions' # should be transformed to direct }, - {'keyword': 'planification écologique', + {'hrfp': True, + 'keyword': 'planification écologique', 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 4 + 150, 'window_number': 4, 'theme': 'attenuation_climatique_solutions' # should be transformed to direct @@ -1385,7 +1654,8 @@ def test_transform_false_positive_keywords_to_positive_different_and_same_subjec 'timestamp': original_timestamp + 150, 'theme': 'changement_climatique_constat' }, - {'keyword': "activisme climatique", + { + 'keyword': "activisme climatique", 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 1 + 151, 'theme': 'attenuation_climatique_solutions_indirectes' # should be transformed to direct }, @@ -1417,7 +1687,9 @@ def test_transform_false_positive_keywords_to_positive_different_and_same_subjec 'window_number': 0, 'theme': 'changement_climatique_constat' }, - {'keyword': "activisme climatique", + { + 'hrfp': True, + 'keyword': "activisme climatique", 'timestamp': original_timestamp + get_keyword_time_separation_ms(15) * 1 + 151, 'window_number': 1, 'theme': 'attenuation_climatique_solutions' # should be transformed to direct diff --git a/test/sitemap/test_main_import_api.py b/test/sitemap/test_main_import_api.py index b5ae8cda..35696eba 100644 --- a/test/sitemap/test_main_import_api.py +++ b/test/sitemap/test_main_import_api.py @@ -35,7 +35,9 @@ def test_main_api_import(): def test_first_row_api_import(): primary_key = "29d2b1f8267b206cb62e475b960de3247e835273f396af012f5ce21bf3056472" + specific_keyword = get_keyword(primary_key) + logging.info(f"Getting {primary_key} :\n {specific_keyword}") assert set(specific_keyword.theme) == set([ 'biodiversite_concepts_generaux_indirectes', 'changement_climatique_consequences_indirectes', diff --git a/test/sitemap/test_update_pg_keywords.py b/test/sitemap/test_update_pg_keywords.py index d82ca288..83830aaf 100644 --- a/test/sitemap/test_update_pg_keywords.py +++ b/test/sitemap/test_update_pg_keywords.py @@ -149,6 +149,17 @@ def test_delete_keywords(): ,0 ,0 ,0 + ,0 + ,0 + ,0 + ,0 + ,0 + ,0 + ,0 + ,0 + ,0 + ,0 + ,0 ) session.commit() assert get_keyword(primary_key) == None @@ -188,6 +199,17 @@ def test_first_update_keywords(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }]) assert save_to_pg(df, keywords_table, conn) == 1 @@ -212,7 +234,18 @@ def test_first_update_keywords(): ,number_of_biodiversite_solutions_directes \ ,number_of_keywords_climat \ ,number_of_keywords_biodiversite \ - ,number_of_keywords_ressources = get_themes_keywords_duration(plaintext, srt, start) + ,number_of_keywords_ressources \ + ,number_of_changement_climatique_constat_no_hrfp \ + ,number_of_changement_climatique_causes_no_hrfp \ + ,number_of_changement_climatique_consequences_no_hrfp \ + ,number_of_attenuation_climatique_solutions_no_hrfp \ + ,number_of_adaptation_climatique_solutions_no_hrfp \ + ,number_of_ressources_no_hrfp \ + ,number_of_ressources_solutions_no_hrfp \ + ,number_of_biodiversite_concepts_generaux_no_hrfp \ + ,number_of_biodiversite_causes_no_hrfp \ + ,number_of_biodiversite_consequences_no_hrfp \ + ,number_of_biodiversite_solutions_no_hrfp = get_themes_keywords_duration(plaintext, srt, start) assert result_after_update.id == result_before_update.id @@ -298,6 +331,17 @@ def test_update_only_one_channel(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }, { "id" : primary_key_tf1, "start": start, @@ -325,6 +369,17 @@ def test_update_only_one_channel(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }]) assert save_to_pg(df, keywords_table, conn) == 2 @@ -353,7 +408,18 @@ def test_update_only_one_channel(): ,number_of_biodiversite_solutions_directes \ ,number_of_keywords_climat \ ,number_of_keywords_biodiversite \ - ,number_of_keywords_ressources = get_themes_keywords_duration(plaintext, srt, start) + ,number_of_keywords_ressources \ + ,number_of_changement_climatique_constat_no_hrfp \ + ,number_of_changement_climatique_causes_no_hrfp \ + ,number_of_changement_climatique_consequences_no_hrfp \ + ,number_of_attenuation_climatique_solutions_no_hrfp \ + ,number_of_adaptation_climatique_solutions_no_hrfp \ + ,number_of_ressources_no_hrfp \ + ,number_of_ressources_solutions_no_hrfp \ + ,number_of_biodiversite_concepts_generaux_no_hrfp \ + ,number_of_biodiversite_causes_no_hrfp \ + ,number_of_biodiversite_consequences_no_hrfp \ + ,number_of_biodiversite_solutions_no_hrfp = get_themes_keywords_duration(plaintext, srt, start) assert result_after_update_tf1.id == result_before_update_tf1.id assert result_after_update_m6.id == result_before_update_m6.id @@ -431,6 +497,17 @@ def test_update_only_program(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }]) assert save_to_pg(df, keywords_table, conn) == 1 @@ -505,6 +582,17 @@ def test_update_only_program_with_only_one_channel(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }, { "id" : primary_key_tf1, "start": start, @@ -532,6 +620,17 @@ def test_update_only_program_with_only_one_channel(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }]) assert save_to_pg(df, keywords_table, conn) == 2 @@ -621,6 +720,17 @@ def test_update_only_empty_program(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }, { "id" : primary_key_tf1, "start": start, @@ -648,6 +758,17 @@ def test_update_only_empty_program(): ,"number_of_keywords_climat": wrong_value ,"number_of_keywords_biodiversite": wrong_value ,"number_of_keywords_ressources": wrong_value + ,"number_of_changement_climatique_constat_no_hrfp": wrong_value, + "number_of_changement_climatique_causes_no_hrfp": wrong_value, + "number_of_changement_climatique_consequences_no_hrfp": wrong_value, + "number_of_attenuation_climatique_solutions_no_hrfp": wrong_value, + "number_of_adaptation_climatique_solutions_no_hrfp": wrong_value, + "number_of_ressources_no_hrfp": wrong_value, + "number_of_ressources_solutions_no_hrfp": wrong_value, + "number_of_biodiversite_concepts_generaux_no_hrfp": wrong_value, + "number_of_biodiversite_causes_no_hrfp": wrong_value, + "number_of_biodiversite_consequences_no_hrfp": wrong_value, + "number_of_biodiversite_solutions_no_hrfp" : wrong_value }]) assert save_to_pg(df, keywords_table, conn) == 2