diff --git a/README.md b/README.md index 9eef64c7..8e1756e0 100644 --- a/README.md +++ b/README.md @@ -287,6 +287,21 @@ Otherwise, default is all channels In case we have a new word detection logic, we must re apply it to all saved keywords inside our database. We should use env variable `UPDATE` like in docker compose (should be set to "true") + +In order to see actual change in the local DB, run the test first `docker compose up test` and then these commands : +``` +docker exec -ti quotaclimat-postgres_db-1 bash +psql -h localhost --port 5432 -d barometre -U user +--> enter password : password +UPDATE keywords set number_of_keywords=1000 WHERE id = '71b8126a50c1ed2e5cb1eab00e4481c33587db478472c2c0e74325abb872bef6'; +UPDATE keywords set number_of_keywords=1000 WHERE id = '975b41e76d298711cf55113a282e7f11c28157d761233838bb700253d47be262'; +``` + +After having updated `UPDATE` env variable to true inside docker-compose.yml and running `docker compose up mediatree` you should see these logs : +``` + update_pg_keywords.py:20 | Difference old 1000 - new_number_of_keywords 0 +``` + ### Fix linting Before committing, make sure that the line of codes you wrote are conform to PEP8 standard by running: ```bash diff --git a/docker-compose.yml b/docker-compose.yml index 30c7f125..272fb59e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -136,7 +136,7 @@ services: POSTGRES_PORT: 5432 PORT: 5050 # healthcheck HEALTHCHECK_SERVER: "0.0.0.0" - SENTRY_DSN: prod_only + # SENTRY_DSN: prod_only # START_DATE: 1704576615 # to test batch import # UPDATE: "true" # to batch update PG # CHANNEL : fr3-idf # to reimport only one channel diff --git a/quotaclimat/data_processing/mediatree/update_pg_keywords.py b/quotaclimat/data_processing/mediatree/update_pg_keywords.py index 2817b299..30f80efd 100644 --- a/quotaclimat/data_processing/mediatree/update_pg_keywords.py +++ b/quotaclimat/data_processing/mediatree/update_pg_keywords.py @@ -15,11 +15,13 @@ def update_keywords(session: Session, batch_size: int = 50000) -> list: for i in range(0, total_updates, batch_size): batch_updates = saved_keywords[i:i+batch_size] for keyword_id, plaintext, keywords_with_timestamp, number_of_keywords, start in batch_updates: - logging new_number_of_keywords = count_keywords_duration_overlap_without_indirect(keywords_with_timestamp, start) - logging.debug(f"{keyword_id} new value {new_number_of_keywords}") - update_number_of_keywords(session, keyword_id, new_number_of_keywords) - + if(number_of_keywords != new_number_of_keywords): + logging.info(f"Difference old {number_of_keywords} - new_number_of_keywords {new_number_of_keywords}") + logging.debug(f"{keyword_id} new value {new_number_of_keywords}") + update_number_of_keywords(session, keyword_id, new_number_of_keywords) + else: + logging.debug("No difference") logging.info(f"bulk update done {i} out of {total_updates}") session.commit()