Skip to content

Commit

Permalink
wip: identify claim out of context for a keyword
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus committed Dec 9, 2024
1 parent 6a08b74 commit 20ce377
Show file tree
Hide file tree
Showing 5 changed files with 483 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
secrets/pwd_api.txt
secrets/username_api.txt
secrets/username_api.txt
secrets/*
documents-experts/
llm/
cc-bio.json
*.xlsx
coverage_re

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
52 changes: 51 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,50 @@ services:
logging: # no logs for postgres container
driver: none


llm:
ports:
- 5555:5555
build:
context: ./
dockerfile: Dockerfile_api_import
entrypoint: ["python", "quotaclimat/sentiment/enrich_with_llm.py"]
environment:
ENV: docker # change me to prod for real cases
LOGLEVEL: DEBUG # Change me to info (debug, info, warning, error) to have less log
PYTHONPATH: /app
POSTGRES_USER: user
POSTGRES_DB: barometre
POSTGRES_PASSWORD: password
POSTGRES_HOST: postgres_db
POSTGRES_PORT: 5432
PORT_HS: 5555 # healthcheck
HEALTHCHECK_SERVER: "0.0.0.0"
MEDIATREE_USER : /run/secrets/username_api
MEDIATREE_PASSWORD: /run/secrets/pwd_api
SCW_SECRET: /run/secrets/scw_api
SCW_API_URL: /run/secrets/scw_api_url
MEDIATREE_AUTH_URL: https://keywords.mediatree.fr/api/auth/token/
KEYWORDS_URL: https://keywords.mediatree.fr/api/subtitle/ # https://keywords.mediatree.fr/docs/#api-Subtitle-SubtitleList
MODIN_ENGINE: ray
MODIN_CPUS: 4 # "https://modin.readthedocs.io/en/0.11.0/using_modin.html#reducing-or-limiting-the-resources-modin-can-use"
MODIN_MEMORY: 1000000000 # 1Gb
RAY_memory_usage_threshold: 1
mem_limit: "1G"
volumes:
- ./quotaclimat/:/app/quotaclimat/
- ./llm/:/app/llm/
- ./postgres/:/app/postgres/
- ./test/:/app/test/
secrets:
- pwd_api
- username_api
- scw_api
- scw_api_url
depends_on:
postgres_db:
condition: service_healthy

mediatree:
ports:
- 5050:5050
Expand Down Expand Up @@ -156,6 +200,7 @@ services:
CHANNEL : fr3-idf # to reimport only one channel
MEDIATREE_USER : /run/secrets/username_api
MEDIATREE_PASSWORD: /run/secrets/pwd_api
SCW_SECRET: /run/secrets/scw_api
MEDIATREE_AUTH_URL: https://keywords.mediatree.fr/api/auth/token/
KEYWORDS_URL: https://keywords.mediatree.fr/api/subtitle/ # https://keywords.mediatree.fr/docs/#api-Subtitle-SubtitleList
MODIN_ENGINE: ray
Expand All @@ -170,6 +215,7 @@ services:
secrets:
- pwd_api
- username_api
- scw_api
depends_on:
nginxtest:
condition: service_healthy
Expand Down Expand Up @@ -200,4 +246,8 @@ secrets: # https://docs.docker.com/compose/use-secrets/
pwd_api:
file: secrets/pwd_api.txt
username_api:
file: secrets/username_api.txt
file: secrets/username_api.txt
scw_api:
file: secrets/scw_api.txt
scw_api_url:
file: secrets/scw_api_url.txt
Loading

1 comment on commit 20ce377

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py43784%36–38, 56–58, 63
   insert_existing_data_example.py19384%25–27
postgres/schemas
   models.py1681193%137–144, 157, 159–160, 225–226, 240–241
quotaclimat/data_ingestion
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py553733%21–42, 45–58, 62–73
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py21313338%44–48, 53–74, 78–81, 87, 90–132, 138–153, 158, 171–183, 187–193, 206–218, 221–225, 231, 269–270, 273–304, 307–309
   channel_program.py1625765%21–23, 34–36, 53–54, 57–59, 98–99, 108, 124, 175–216
   config.py15287%7, 16
   detect_keywords.py2521694%111–118, 126–127, 271, 341–348, 390
   update_pg_keywords.py674927%15–130, 154, 157, 164–179, 213–250, 257
   utils.py792568%29–53, 56, 65, 86–87, 117–120
quotaclimat/utils
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   sentry.py11282%22–23
TOTAL133438771% 

Tests Skipped Failures Errors Time
102 0 💤 0 ❌ 0 🔥 8m 24s ⏱️

Please sign in to comment.