Skip to content

Commit

Permalink
chores: update keywords list + channel france24/sudradio (#129)
Browse files Browse the repository at this point in the history
* chores

* chores: sud-radio

* test: keyword updated

* wip

* dependencies updates

* poetry update 1.8.1
  • Loading branch information
polomarcus authored Mar 1, 2024
1 parent 48e9742 commit 6c80490
Show file tree
Hide file tree
Showing 13 changed files with 448 additions and 401 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:

env:
PYTHON_VERSION: '3.11'
POETRY_VERSION: '1.7.1'
POETRY_VERSION: '1.8.1'

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:

env:
PYTHON_VERSION: '3.11'
POETRY_VERSION: '1.7.1'
POETRY_VERSION: '1.8.1'

jobs:
# Label of the runner job
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WORKDIR /app

COPY pyproject.toml poetry.lock ./

RUN pip install poetry==1.7.1
RUN pip install poetry==1.8.1

RUN poetry install

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_api_import
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WORKDIR /app

COPY pyproject.toml poetry.lock ./

RUN pip install poetry==1.7.1
RUN pip install poetry==1.8.1

RUN poetry install

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_ingest
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WORKDIR /app

COPY pyproject.toml poetry.lock ./

RUN pip install poetry==1.7.1
RUN pip install poetry==1.8.1

RUN poetry install

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_streamlit
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WORKDIR /app

COPY pyproject.toml poetry.lock ./

RUN pip install poetry==1.7.1
RUN pip install poetry==1.8.1

RUN poetry install

Expand Down
585 changes: 287 additions & 298 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions quotaclimat/data_processing/mediatree/api_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_channels():
else: #prod - all channels
logging.warning("All channels are used")
return ["tf1", "france2", "fr3-idf", "france5", "m6", "arte", "d8", "tmc", "bfmtv", "lci", "franceinfotv", "itele",
"europe1", "france-culture", "france-inter", "nrj", "rmc", "rtl", "rtl2"]
"europe1", "france-culture", "france-inter", "sud-radio", "rmc", "rtl", "france24"]

return channels

Expand Down Expand Up @@ -99,7 +99,7 @@ async def get_and_save_api_data(exit_event):
# @see https://github.com/jd/tenacity/tree/main
@retry(wait=wait_random_exponential(multiplier=1, max=60),stop=stop_after_attempt(7))
def get_auth_token(password=password, user_name=USER):
logger.info(f"Getting a token for user {user_name}")
logging.info(f"Getting a token for user {user_name}")
try:
post_arguments = {
'grant_type': 'password'
Expand Down Expand Up @@ -201,7 +201,7 @@ def extract_api_sub(

def parse_raw_json(response):
if response.status_code == 504:
logger.error(f"Mediatree API server error 504 (retry enabled)\n {response.content}")
logging.error(f"Mediatree API server error 504 (retry enabled)\n {response.content}")
raise Exception
else:
return json.loads(response.content.decode('utf_8'))
Expand All @@ -215,7 +215,7 @@ def parse_number_pages(response_sub) -> int :
def parse_reponse_subtitle(response_sub, channel = None) -> Optional[pd.DataFrame]:
with sentry_sdk.start_transaction(op="task", name="parse_reponse_subtitle"):
total_results = parse_total_results(response_sub)
logging.getLogger("modin.logger.default").setLevel(logging.WARNING)
logging.getLogger("modin.logging.default").setLevel(logging.WARNING)
if(total_results > 0):
logging.info(f"{total_results} 'total_results' field")

Expand Down Expand Up @@ -244,7 +244,7 @@ def log_dataframe_size(df, channel):

async def main():
with monitor(monitor_slug='mediatree'): #https://docs.sentry.io/platforms/python/crons/
logger.info("Start api mediatree import")
logging.info("Start api mediatree import")
create_tables()

event_finish = asyncio.Event()
Expand Down
2 changes: 2 additions & 0 deletions quotaclimat/data_processing/mediatree/detect_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def format_word_regex(word: str) -> str:
return word + "s?"
elif word.endswith('s'):
return word + '?'
elif word.endswith('x'):
return word + '?'
else:
return word

Expand Down
Loading

1 comment on commit 6c80490

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py44784%37–39, 58–60, 65
   insert_existing_data_example.py20385%25–27
postgres/schemas
   models.py721579%74–81, 91–92, 101–111
quotaclimat/data_analytics
   analytics_signataire_charte.py29290%1–67
   bilan.py1081080%2–372
   data_coverage.py34340%1–94
   exploration.py1251250%1–440
   sitemap_analytics.py1181180%1–343
quotaclimat/data_ingestion
   categorization_program_type.py110%1
   config_youtube.py110%1
   scaleway_db_backups.py34340%1–74
   scrap_chartejournalismeecologie_signataires.py50500%1–169
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
   scrap_tv_program.py62620%1–149
   scrap_youtube.py1141140%1–238
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py594131%21–42, 45–65, 69–80
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py17710342%38–42, 47–53, 57–60, 66, 69–96, 102–117, 122–124, 149–161, 165–168, 172–178, 189–200, 203–207, 213, 237–238, 242, 246–265, 268–270
   config.py15287%7, 16
   detect_keywords.py145696%77–79, 167–169
   utils.py662267%19, 30–54, 57, 76–77
quotaclimat/data_processing/sitemap
   sitemap_processing.py412734%15–19, 23–25, 29–47, 51–58, 66–96, 101–103
quotaclimat/utils
   channels.py660%1–95
   climate_keywords.py220%3–35
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   plotly_theme.py17170%1–56
   sentry.py10280%21–22
TOTAL162597440% 

Tests Skipped Failures Errors Time
63 0 💤 0 ❌ 0 🔥 51.821s ⏱️

Please sign in to comment.