diff --git a/.talismanrc b/.talismanrc index d491e668b..9eaf512c6 100644 --- a/.talismanrc +++ b/.talismanrc @@ -3,6 +3,8 @@ fileignoreconfig: checksum: f7719ba0d36160d97e80ee15cb5415b601354576929e36df0596c7d192465cfb - filename: README.md checksum: df312ccb4c75fc4c2441a1f7f2c7817ee98ffb3065c78d5d7d6addf6ab129176 +- filename: analytics/.env.example + checksum: 8b09617118ef02245d361673d661fdee62b71a683a58d344dd09354f9c144c37 - filename: analytics/dagster/src/assets/populate_housings_ban_addresses.py checksum: 66b41821bccc209598ed3d082e5666102edf52ae854b41db3f0b3fe3640657b7 - filename: analytics/dagster/src/assets/populate_owners_ban_addresses.py @@ -10,7 +12,7 @@ fileignoreconfig: - filename: analytics/dagster/src/resources/ban_config.py checksum: 034c6924978983da0ca5897bb06b64598a5a813dc93d1d9e8f8a62da952d4d22 - filename: analytics/dagster/src/resources/database_resources.py - checksum: 12fb6c30e1a0378c39cd1da759ec1ece28bda86ea6353c3ea0076c2d94da682e + checksum: 37520a27778a7a89bedb5b4cf45dbc8361f02d78e02fa10e29ed8fc4ae312db5 - filename: frontend/.env.example checksum: 7e2a5ff197c49ff9f715b3d189da7282bdb40de53ea49735e9f183ece19168fc - filename: frontend/src/components/Draft/DraftSender.tsx diff --git a/analytics/.env.example b/analytics/.env.example new file mode 100644 index 000000000..5067dc699 --- /dev/null +++ b/analytics/.env.example @@ -0,0 +1,17 @@ +DAGSTER_PG_PASSWORD=postgres +DAGSTER_PG_USERNAME=postgres +DAGSTER_PG_DB=analytics +DAGSTER_PG_HOST=postgres +DAGSTER_PG_PORT=5432 + +POSTGRES_PRODUCTION_DB_NAME=database +POSTGRES_PRODUCTION_USER=postgres +POSTGRES_PRODUCTION_PASSWORD=postgres +POSTGRES_PRODUCTION_DB=localhost +POSTGRES_PRODUCTION_PORT=5432 + +BAN_API_URL=https://api-adresse.data.gouv.fr/search/csv/ +CSV_FILE_PATH=temp_csv +CHUNK_SIZE=10000 +MAX_FILES=5 +DISABLE_MAX_FILES=False diff --git a/analytics/dagster/src/config.py b/analytics/dagster/src/config.py index 6fd6f0fe9..c2f263246 100644 --- a/analytics/dagster/src/config.py +++ b/analytics/dagster/src/config.py @@ -33,6 +33,19 @@ class Config: DAGSTER_RETRY_DELAY = 10 * 60 # 10 minutes DAGSTER_RETRY_MAX_ATTEMPS = 3 + BAN_API_URL = os.environ.get("BAN_API_URL") + CSV_FILE_PATH = os.environ.get("CSV_FILE_PATH") + try: + CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "10000")) + except ValueError: + raise ValueError("The environment variable CHUNK_SIZE must be an integer.") + + try: + MAX_FILES = int(os.environ.get("MAX_FILES", "5")) + except ValueError: + raise ValueError("The environment variable MAX_FILES must be an integer.") + + DISABLE_MAX_FILES = os.environ.get("DISABLE_MAX_FILES", "True") == "True" public_tables = [ "marts_public_establishments_morphology", diff --git a/analytics/dagster/src/resources/ban_config.py b/analytics/dagster/src/resources/ban_config.py index 07f35ac40..2955849f6 100644 --- a/analytics/dagster/src/resources/ban_config.py +++ b/analytics/dagster/src/resources/ban_config.py @@ -1,14 +1,16 @@ from pydantic_settings import BaseSettings from pydantic import Field, field_validator -from dagster import resource +import dagster +from dagster import resource, String, Int, Bool +from ..config import Config class BANConfig(BaseSettings): - api_url: str = Field("https://api-adresse.data.gouv.fr/search/csv/", env="BAN_API_URL") - csv_file_path: str = Field("temp_csv", env="CSV_FILE_PATH") + api_url: str = Field(Config.BAN_API_URL) + csv_file_path: str = Field(Config.CSV_FILE_PATH) - chunk_size: int = Field(10000, env="CHUNK_SIZE") - max_files: int = Field(5, env="MAX_FILES") - disable_max_files: bool = Field(False, env="DISABLE_MAX_FILES") + chunk_size: int = Field(Config.CHUNK_SIZE) + max_files: int = Field(Config.MAX_FILES) + disable_max_files: bool = Field(Config.DISABLE_MAX_FILES) @field_validator("chunk_size") def chunk_size_positive(cls, v): @@ -22,11 +24,11 @@ class Config: @resource( config_schema={ - "api_url": str, - "csv_file_path": str, - "chunk_size": int, - "max_files": int, - "disable_max_files": bool, + "api_url": dagster.Field(String, default_value=Config.BAN_API_URL), + "csv_file_path": dagster.Field(String, default_value=Config.CSV_FILE_PATH), + "chunk_size": dagster.Field(Int, default_value=Config.CHUNK_SIZE), + "max_files": dagster.Field(Int, default_value=Config.MAX_FILES), + "disable_max_files": dagster.Field(Bool, default_value=Config.DISABLE_MAX_FILES), } ) def ban_config_resource(init_context): diff --git a/analytics/dagster/src/resources/database_resources.py b/analytics/dagster/src/resources/database_resources.py index dcbe9e37e..b78f88f33 100644 --- a/analytics/dagster/src/resources/database_resources.py +++ b/analytics/dagster/src/resources/database_resources.py @@ -1,13 +1,15 @@ -from dagster import resource +from dagster import resource, Field, String from sqlalchemy import create_engine import psycopg2 +from ..config import Config + @resource(config_schema={ - "db_name": str, - "db_user": str, - "db_password": str, - "db_host": str, - "db_port": int, + "db_name": Field(String, default_value=Config.POSTGRES_PRODUCTION_DB_NAME), + "db_user": Field(String, default_value=Config.POSTGRES_PRODUCTION_USER), + "db_password": Field(String, default_value=Config.POSTGRES_PRODUCTION_PASSWORD), + "db_host": Field(String, default_value=Config.POSTGRES_PRODUCTION_DB), + "db_port": Field(String, default_value=Config.POSTGRES_PRODUCTION_PORT), }) def psycopg2_connection_resource(init_context): config = init_context.resource_config @@ -24,11 +26,11 @@ def psycopg2_connection_resource(init_context): conn.close() @resource(config_schema={ - "db_name": str, - "db_user": str, - "db_password": str, - "db_host": str, - "db_port": int, + "db_name": Field(String, default_value=Config.POSTGRES_PRODUCTION_DB_NAME), + "db_user": Field(String, default_value=Config.POSTGRES_PRODUCTION_USER), + "db_password": Field(String, default_value=Config.POSTGRES_PRODUCTION_PASSWORD), + "db_host": Field(String, default_value=Config.POSTGRES_PRODUCTION_DB), + "db_port": Field(String, default_value=Config.POSTGRES_PRODUCTION_PORT), }) def sqlalchemy_engine_resource(init_context): config = init_context.resource_config