Skip to content

Commit

Permalink
refacto: remove deepcopy (#240)
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus authored Sep 13, 2024
1 parent c14e47a commit f8ea865
Showing 1 changed file with 16 additions and 10 deletions.
26 changes: 16 additions & 10 deletions quotaclimat/data_processing/mediatree/detect_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,25 +201,31 @@ def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], s
return [None] * number_of_elements_in_array

def get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds: int = 15):
logging.debug(f"using duration_seconds {duration_seconds}")
keywords_with_timestamp_copy = copy.deepcopy(keywords_with_timestamp)
logging.debug(f"Using duration_seconds {duration_seconds}")

# Shallow copy to avoid unnecessary deep copying (wip: for memory leak)
keywords_with_timestamp_copy = [item.copy() for item in keywords_with_timestamp]

keywords_with_timestamp_copy = tag_wanted_duration_second_window_number(keywords_with_timestamp_copy, start, duration_seconds=duration_seconds)
keywords_with_timestamp_copy = transform_false_positive_keywords_to_positive(keywords_with_timestamp_copy, start)
keywords_with_timestamp_copy = filter_keyword_with_same_timestamp(keywords_with_timestamp_copy)

return keywords_with_timestamp_copy

def get_themes(keywords_with_timestamp: List[dict]) -> List[str]:
return list(set([kw['theme'] for kw in keywords_with_timestamp]))

def clean_metadata(keywords_with_timestamp):
keywords_with_timestamp_copy = copy.deepcopy(keywords_with_timestamp) # immutable
if( len(keywords_with_timestamp_copy)) > 0:
for item in keywords_with_timestamp_copy:
item.pop('window_number', None)
def clean_metadata(keywords_with_timestamp):
if not keywords_with_timestamp:
return keywords_with_timestamp

return keywords_with_timestamp_copy
else:
return keywords_with_timestamp_copy
# Shallow copy instead of deep copy (wip: for memory leak)
keywords_with_timestamp_copy = [item.copy() for item in keywords_with_timestamp]

for item in keywords_with_timestamp_copy:
item.pop('window_number', None)

return keywords_with_timestamp_copy

def log_min_max_date(df):
max_date = max(df['start'])
Expand Down

1 comment on commit f8ea865

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py43784%36–38, 56–58, 63
   insert_existing_data_example.py19384%25–27
postgres/schemas
   models.py1501093%124–131, 143–144, 202–203, 217–218
quotaclimat/data_ingestion
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py553733%21–42, 45–58, 62–73
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py21213337%44–48, 53–69, 73–76, 82, 85–127, 133–148, 152–153, 166–178, 182–188, 201–213, 216–220, 226, 262–263, 266–302, 305–307
   channel_program.py1575664%28–30, 41–43, 60–61, 64–66, 93, 105, 114, 154–195
   config.py15287%7, 16
   detect_keywords.py209896%220, 278–285
   update_pg_keywords.py543928%14–100, 125–129, 152–178, 184
   utils.py692268%27–51, 54, 63, 84–85
quotaclimat/utils
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   sentry.py11282%22–23
TOTAL124336471% 

Tests Skipped Failures Errors Time
87 0 💤 0 ❌ 0 🔥 1m 35s ⏱️

Please sign in to comment.