Skip to content

Commit

Permalink
feat: import only one channel based on var env (#105)
Browse files Browse the repository at this point in the history
* feat: import only one channel based on var env

* fix: ci
  • Loading branch information
polomarcus authored Feb 16, 2024
1 parent f9a62af commit 2a7c481
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 8 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ Use env variable `START_DATE` like in docker compose (epoch second format : 1705

Otherwise, default is yesterday midnight date.

### Batch import based on channel
Use env variable `CHANNEL` like in docker compose (string: tf1)

Otherwise, default is all channels

### Batch update
In case we have a new word detection logic, we must re apply it to all saved keywords inside our database.

Expand Down
4 changes: 3 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ services:
entrypoint: ["poetry", "run", "pytest", "--cov-report", "term:skip-covered", "--cov=quotaclimat", "--cov=postgres", "test/"]
environment:
ENV: docker
# CHANNEL: "fr3-idf"
LOGLEVEL: DEBUG
PYTHONPATH: /app
POSTGRES_USER: user
Expand Down Expand Up @@ -133,8 +134,9 @@ services:
POSTGRES_PORT: 5432
PORT: 5050 # healthcheck
HEALTHCHECK_SERVER: "0.0.0.0"
# START_DATE: 1704576615 # to test batch import
# START_DATE: 1704576615 # to test batch import
# UPDATE: "true" # to batch update PG
# CHANNEL : fr3-idf # to reimport only one channel
MEDIATREE_USER : /run/secrets/username_api
MEDIATREE_PASSWORD: /run/secrets/pwd_api
MEDIATREE_AUTH_URL: https://keywords.mediatree.fr/api/auth/token/
Expand Down
22 changes: 15 additions & 7 deletions quotaclimat/data_processing/mediatree/api_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,28 @@ async def update_pg_data(exit_event):
update_keywords(session)
exit_event.set()

def get_channels():
if(os.environ.get("ENV") == "docker" or os.environ.get("CHANNEL") is not None):
default_channel = os.environ.get("CHANNEL") or "france2"
logging.warning(f"Only one channel of env var CHANNEL {default_channel} (default to france2) is used")

channels = [default_channel]
else: #prod - all channels
logging.warning("All channels are used")
return ["tf1", "france2", "fr3-idf", "m6", "arte", "d8", "tmc", "bfmtv", "lci", "franceinfotv", "itele",
"europe1", "france-culture", "france-inter", "nrj", "rmc", "rtl", "rtl2"]

return channels

async def get_and_save_api_data(exit_event):
conn = connect_to_db()
token=get_auth_token(password=password, user_name=USER)
type_sub = 's2t'

(start_date_to_query, end_epoch) = get_start_end_date_env_variable_with_default()

if(os.environ.get("ENV") == "docker"):
logging.warning("Docker cases - only some channels are used")
channels = ["france2"]
else: #prod
channels = ["tf1", "france2", "m6", "arte", "d8", "tmc", "bfmtv", "lci", "franceinfotv", "itele",
"europe1", "france-culture", "france-inter", "nrj", "rmc", "rtl", "rtl2"]

channels = get_channels()

range = get_date_range(start_date_to_query, end_epoch)
logging.info(f"Number of date to query : {len(range)}")
for date in range:
Expand Down
6 changes: 6 additions & 0 deletions test/sitemap/test_mediatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ def test_transform_theme_query_includes():

assert output == expected

def test_get_channels():
if(os.environ.get("ENV") == "docker"):
assert get_channels() == ["france2"] # default for docker compose config
else:
assert get_channels() == ["tf1", "france2", "fr3-idf", "m6", "arte", "d8", "tmc", "bfmtv", "lci", "franceinfotv", "itele",
"europe1", "france-culture", "france-inter", "nrj", "rmc", "rtl", "rtl2"]

def test_get_themes_keywords_duration():
subtitles = [{
Expand Down

1 comment on commit 2a7c481

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py46785%38–40, 59–61, 66
   insert_existing_data_example.py20385%25–27
postgres/schemas
   models.py711579%74–81, 91–92, 101–111
quotaclimat/data_analytics
   analytics_signataire_charte.py29290%1–67
   bilan.py1081080%2–372
   data_coverage.py34340%1–94
   exploration.py1251250%1–440
   sitemap_analytics.py1181180%1–343
quotaclimat/data_ingestion
   categorization_program_type.py110%1
   config_youtube.py110%1
   scaleway_db_backups.py34340%1–74
   scrap_chartejournalismeecologie_signataires.py50500%1–169
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
   scrap_tv_program.py62620%1–149
   scrap_youtube.py1141140%1–238
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py544026%18–39, 42–61, 65–76
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py17910641%32–36, 41–44, 48–51, 57, 60–85, 91–106, 111–113, 138–145, 149–152, 156–162, 173–184, 187–191, 197, 222–223, 229, 231, 234–260, 264–275
   config.py15287%7, 16
   detect_keywords.py88693%101–108
   utils.py642167%27–51, 54, 73–74
quotaclimat/data_processing/sitemap
   sitemap_processing.py412734%15–19, 23–25, 29–47, 51–58, 66–96, 101–103
quotaclimat/utils
   channels.py660%1–95
   climate_keywords.py220%3–35
   healthcheck_config.py291452%22–24, 27–38
   logger.py14379%22–24
   plotly_theme.py17170%1–56
TOTAL153696537% 

Tests Skipped Failures Errors Time
39 0 💤 0 ❌ 0 🔥 10.885s ⏱️

Please sign in to comment.