diff --git a/quotaclimat/data_processing/mediatree/api_import.py b/quotaclimat/data_processing/mediatree/api_import.py index a6f5152e..ae3c76c2 100644 --- a/quotaclimat/data_processing/mediatree/api_import.py +++ b/quotaclimat/data_processing/mediatree/api_import.py @@ -45,9 +45,12 @@ def refresh_token(token, date): # use when word detection is changed async def update_pg_data(exit_event): logging.info("Updating already saved data from Postgresql") - session = get_db_session() - update_keywords(session) - exit_event.set() + try: + session = get_db_session() + update_keywords(session) + exit_event.set() + except Exception as err: + logging.error("Could update_pg_data %s:(%s) %s" % (type(err).__name__, err)) def get_channels(): if(os.environ.get("ENV") == "docker" or os.environ.get("CHANNEL") is not None): diff --git a/quotaclimat/data_processing/mediatree/detect_keywords.py b/quotaclimat/data_processing/mediatree/detect_keywords.py index b08a5e23..4b0e750f 100644 --- a/quotaclimat/data_processing/mediatree/detect_keywords.py +++ b/quotaclimat/data_processing/mediatree/detect_keywords.py @@ -145,7 +145,12 @@ def get_keyword_by_fifteen_second_window(filtered_themes: List[dict], start: dat window_number = int( (keyword_info['timestamp'] - start.timestamp() * 1000) // (window_size_seconds) ) logging.debug(f"Window number {window_number} - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}") if window_number >= number_of_windows and window_number >= 0: - logging.error(f"Window number {window_number} is out of range - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}") + if(window_number == number_of_windows): # give some slack to mediatree subtitle edge case + logging.warning(f"Edge cases around 2 minutes - still counting for one - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}") + window_number = number_of_windows - 1 + fifteen_second_window[window_number] = 1 + else: + logging.error(f"Window number {window_number} is out of range - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}") else: fifteen_second_window[window_number] = 1 diff --git a/test/sitemap/test_detect_keywords.py b/test/sitemap/test_detect_keywords.py index 830f2398..2afaf98f 100644 --- a/test/sitemap/test_detect_keywords.py +++ b/test/sitemap/test_detect_keywords.py @@ -793,17 +793,29 @@ def test_simple_get_keyword_by_fifteen_second_window(): assert get_keyword_by_fifteen_second_window(keywords_with_timestamp, start) == [1, 0, 0, 0, 0, 0, 0, 0] -def test_out_of_bound_get_keyword_by_fifteen_second_window(): +def test_edge_out_of_bound_get_keyword_by_fifteen_second_window(): keywords_with_timestamp = [ { "keyword" : 'conditions de vie sur terre', - "timestamp": original_timestamp + get_keyword_time_separation_ms() * 8, + "timestamp": original_timestamp + get_keyword_time_separation_ms() * 8 + 10, # edge case - still counting for one + "theme":"changement_climatique_constat", + } + ] + + assert get_keyword_by_fifteen_second_window(keywords_with_timestamp, start) == [0, 0, 0, 0, 0, 0, 0, 1] + +def test_really_out_of_bound_get_keyword_by_fifteen_second_window(): + keywords_with_timestamp = [ + { + "keyword" : 'conditions de vie sur terre', + "timestamp": original_timestamp + get_keyword_time_separation_ms() * 15 + 10, # edge case - still counting for one "theme":"changement_climatique_constat", } ] assert get_keyword_by_fifteen_second_window(keywords_with_timestamp, start) == [0, 0, 0, 0, 0, 0, 0, 0] + def test_almost_out_of_bound_get_keyword_by_fifteen_second_window(): keywords_with_timestamp = [ {