Skip to content

Commit

Permalink
fix: slacking for really close subtitle near the 2 minute limit
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus committed Feb 26, 2024
1 parent 51adc52 commit 611ee9d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 6 deletions.
9 changes: 6 additions & 3 deletions quotaclimat/data_processing/mediatree/api_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ def refresh_token(token, date):
# use when word detection is changed
async def update_pg_data(exit_event):
logging.info("Updating already saved data from Postgresql")
session = get_db_session()
update_keywords(session)
exit_event.set()
try:
session = get_db_session()
update_keywords(session)
exit_event.set()
except Exception as err:
logging.error("Could update_pg_data %s:(%s) %s" % (type(err).__name__, err))

def get_channels():
if(os.environ.get("ENV") == "docker" or os.environ.get("CHANNEL") is not None):
Expand Down
7 changes: 6 additions & 1 deletion quotaclimat/data_processing/mediatree/detect_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,12 @@ def get_keyword_by_fifteen_second_window(filtered_themes: List[dict], start: dat
window_number = int( (keyword_info['timestamp'] - start.timestamp() * 1000) // (window_size_seconds) )
logging.debug(f"Window number {window_number} - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}")
if window_number >= number_of_windows and window_number >= 0:
logging.error(f"Window number {window_number} is out of range - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}")
if(window_number == number_of_windows): # give some slack to mediatree subtitle edge case
logging.warning(f"Edge cases around 2 minutes - still counting for one - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}")
window_number = number_of_windows - 1
fifteen_second_window[window_number] = 1
else:
logging.error(f"Window number {window_number} is out of range - kwtimestamp {keyword_info['timestamp']} - start {start.timestamp() * 1000}")
else:
fifteen_second_window[window_number] = 1

Expand Down
16 changes: 14 additions & 2 deletions test/sitemap/test_detect_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,17 +793,29 @@ def test_simple_get_keyword_by_fifteen_second_window():

assert get_keyword_by_fifteen_second_window(keywords_with_timestamp, start) == [1, 0, 0, 0, 0, 0, 0, 0]

def test_out_of_bound_get_keyword_by_fifteen_second_window():
def test_edge_out_of_bound_get_keyword_by_fifteen_second_window():
keywords_with_timestamp = [
{
"keyword" : 'conditions de vie sur terre',
"timestamp": original_timestamp + get_keyword_time_separation_ms() * 8,
"timestamp": original_timestamp + get_keyword_time_separation_ms() * 8 + 10, # edge case - still counting for one
"theme":"changement_climatique_constat",
}
]

assert get_keyword_by_fifteen_second_window(keywords_with_timestamp, start) == [0, 0, 0, 0, 0, 0, 0, 1]

def test_really_out_of_bound_get_keyword_by_fifteen_second_window():
keywords_with_timestamp = [
{
"keyword" : 'conditions de vie sur terre',
"timestamp": original_timestamp + get_keyword_time_separation_ms() * 15 + 10, # edge case - still counting for one
"theme":"changement_climatique_constat",
}
]

assert get_keyword_by_fifteen_second_window(keywords_with_timestamp, start) == [0, 0, 0, 0, 0, 0, 0, 0]


def test_almost_out_of_bound_get_keyword_by_fifteen_second_window():
keywords_with_timestamp = [
{
Expand Down

1 comment on commit 611ee9d

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py46785%38–40, 59–61, 66
   insert_existing_data_example.py20385%25–27
postgres/schemas
   models.py711579%74–81, 91–92, 101–111
quotaclimat/data_analytics
   analytics_signataire_charte.py29290%1–67
   bilan.py1081080%2–372
   data_coverage.py34340%1–94
   exploration.py1251250%1–440
   sitemap_analytics.py1181180%1–343
quotaclimat/data_ingestion
   categorization_program_type.py110%1
   config_youtube.py110%1
   scaleway_db_backups.py34340%1–74
   scrap_chartejournalismeecologie_signataires.py50500%1–169
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
   scrap_tv_program.py62620%1–149
   scrap_youtube.py1141140%1–238
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py594131%21–42, 45–65, 69–80
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py18010542%38–42, 47–53, 57–60, 66, 69–96, 102–117, 122–124, 149–156, 160–163, 167–173, 184–195, 198–202, 208, 234–235, 239, 243–262, 266–277
   config.py15287%7, 16
   detect_keywords.py114397%131–133
   utils.py662267%19, 30–54, 57, 76–77
quotaclimat/data_processing/sitemap
   sitemap_processing.py412734%15–19, 23–25, 29–47, 51–58, 66–96, 101–103
quotaclimat/utils
   channels.py660%1–95
   climate_keywords.py220%3–35
   healthcheck_config.py291452%22–24, 27–38
   logger.py14379%22–24
   plotly_theme.py17170%1–56
   sentry.py10280%21–22
TOTAL158896539% 

Tests Skipped Failures Errors Time
54 0 💤 0 ❌ 0 🔥 48.986s ⏱️

Please sign in to comment.