diff --git a/terraform/india/development/main.tf b/terraform/india/development/main.tf index df5ad598..aa393ec9 100644 --- a/terraform/india/development/main.tf +++ b/terraform/india/development/main.tf @@ -6,15 +6,17 @@ # 1.3 - ECS Cluster # 2.0 - S3 bucket for NWP data # 2.1 - S3 bucket for Satellite data +# 2.2 - S3 bucket for Forecast data # 3.0 - Secret containing environment variables for the NWP consumer # 3.1 - Secret containing environment variables for the Satellite consumer # 3.2 - Secret containing HF read access -# 4.0 - ECS task definition for the NWP consumer +# 4.0 - ECS task definition for the ECMWF consumer # 4.1 - ECS task definition for the GFS consumer -# 4.2 - ECS task definition for Collection RUVNL data -# 4.3 - Satellite Consumer -# 4.4 - ECS task definition for the Forecast - Client RU -# 4.5 - ECS task definition for the Forecast - Client AD +# 4.2 - ECS task definition for the MetOffice consumer +# 4.3 - ECS task definition for Collection RUVNL data +# 4.4 - Satellite Consumer +# 4.5 - ECS task definition for the Forecast - Client RU +# 4.6 - ECS task definition for the Forecast - Client AD # 5.0 - Airflow EB Instance # 5.1 - India API EB Instance # 5.2 - India Analysis Dashboard @@ -86,6 +88,16 @@ module "s3-satellite-bucket" { lifecycled_prefixes = ["data"] } +# 2.2 +module "s3-forecast-bucket" { + source = "../../modules/storage/s3-private" + environment = local.environment + region = var.region + domain = local.domain + service_name = "forecast" + lifecycled_prefixes = [""] +} + # 3.0 resource "aws_secretsmanager_secret" "nwp_consumer_secret" { name = "${local.environment}/data/nwp-consumer" @@ -198,9 +210,49 @@ module "nwp_consumer_gfs_live_ecs_task" { ] } +# 4.2 +module "nwp-consumer-metoffice-live-ecs-task" { + source = "../../modules/services/ecs_task" + ecs-task_name = "nwp-consumer-metoffice-india" + ecs-task_type = "consumer" + ecs-task_execution_role_arn = module.ecs-cluster.ecs_task_execution_role_arn + ecs-task_size = { + cpu = 512 + memory = 1024 + } -# 4.2 + aws-region = var.region + aws-environment = local.environment + + s3-buckets = [ + { + id : module.s3-nwp-bucket.bucket_id + access_policy_arn : module.s3-nwp-bucket.write_policy_arn + } + ] + + container-env_vars = [ + { "name" : "LOGLEVEL", "value" : "INFO" }, + { "name" : "METOFFICE_ORDER_ID", "value" : "india-11params-54steps" }, + { "name" : "MODEL_REPOSITORY", "value" : "metoffice-datahub" }, + { "name" : "CONCURRENCY", "value" : "false" }, + { "name" : "ZARRDIR", "value" : format("s3://%s/metoffice/data", module.s3-nwp-bucket.bucket_id) }, + { "name" : "SENTRY_DSN", "value" : var.sentry_dsn }, + ] + container-secret_vars = [ + { + secret_policy_arn: aws_secretsmanager_secret.nwp_consumer_secret.arn, + values: ["METOFFICE_API_KEY"], + } + ] + container-tag = "devsjc-major-refactor" + container-name = "openclimatefix/nwp-consumer" + container-command = ["consume"] +} + + +# 4.3 module "ruvnl_consumer_ecs" { source = "../../modules/services/ecs_task" @@ -234,7 +286,7 @@ module "ruvnl_consumer_ecs" { ] } -# 4.3 - Satellite Consumer +# 4.4 - Satellite Consumer module "satellite_consumer_ecs" { source = "../../modules/services/ecs_task" @@ -277,47 +329,58 @@ module "satellite_consumer_ecs" { -# 4.4 - Forecast - Client RU +# 4.5 - Forecast - Client RUVNL module "forecast" { - source = "../../modules/services/forecast_generic" - - region = var.region - environment = local.environment - app-name = "forecast" - ecs_config = { - docker_image = "openclimatefix/india_forecast_app" - docker_version = var.version-forecast - memory_mb = 3072 - cpu = 1024 - } - rds_config = { - database_secret_arn = module.postgres-rds.secret.arn - database_secret_read_policy_arn = module.postgres-rds.secret-policy.arn - } - s3_nwp_bucket = { - bucket_id = module.s3-nwp-bucket.bucket_id - bucket_read_policy_arn = module.s3-nwp-bucket.read_policy_arn - datadir = "ecmwf/data" - } - s3_satellite_bucket = { - bucket_id = module.s3-satellite-bucket.bucket_id - bucket_read_policy_arn = module.s3-satellite-bucket.read_policy_arn - datadir = "data" - } + source = "../../modules/services/ecs_task" - // this isnt really needed - s3_ml_bucket = { - bucket_id = module.s3-nwp-bucket.bucket_id - bucket_read_policy_arn = module.s3-nwp-bucket.read_policy_arn - } + aws-region = var.region + aws-environment = local.environment + + s3-buckets = [ + { + id : module.s3-nwp-bucket.bucket_id, + access_policy_arn : module.s3-nwp-bucket.read_policy_arn + }, + { + id : module.s3-forecast-bucket.bucket_id, + access_policy_arn : module.s3-forecast-bucket.write_policy_arn + } + ] - loglevel = "INFO" + ecs-task_name = "forecast" + ecs-task_type = "forecast" ecs-task_execution_role_arn = module.ecs-cluster.ecs_task_execution_role_arn - sentry_dsn= var.sentry_dsn + ecs-task_size = { + memory = 3072 + cpu = 1024 + } + + container-env_vars = [ + { "name" : "AWS_REGION", "value" : var.region }, + { "name" : "ENVIRONMENT", "value" : local.environment }, + { "name" : "LOGLEVEL", "value" : "INFO" }, + { "name" : "NWP_ECMWF_ZARR_PATH", "value": "s3://${module.s3-nwp-bucket.bucket_id}/ecmwf/data/latest.zarr" }, + { "name" : "NWP_GFS_ZARR_PATH", "value": "s3://${module.s3-nwp-bucket.bucket_id}/gfs/data/latest.zarr" }, + { "name" : "NWP_MO_GLOBAL_ZARR_PATH", "value": "s3://${module.s3-nwp-bucket.bucket_id}/metoffice/data/latest.zarr" }, + { "name" : "SENTRY_DSN", "value": var.sentry_dsn}, + { "name" : "USE_SATELLITE", "value": "False"}, + { "name" : "SAVE_BATCHES_DIR", "value": "s3://${module.s3-forecast-bucket.bucket_id}/RUVNL"} + ] + + container-secret_vars = [ + {secret_policy_arn: module.postgres-rds.secret.arn, + values: ["DB_URL"] + } + ] + + container-tag = var.version-forecast + container-name = "india_forecast_app" + container-registry = "openclimatefix" + container-command = [] } -# 4.5 - Forecast - Client AD +# 4.6 - Forecast - Client AD module "forecast-ad" { source = "../../modules/services/ecs_task" @@ -327,11 +390,15 @@ module "forecast-ad" { s3-buckets = [ { id : module.s3-satellite-bucket.bucket_id, - access_policy_arn : module.s3-satellite-bucket.write_policy_arn + access_policy_arn : module.s3-satellite-bucket.read_policy_arn }, { id : module.s3-nwp-bucket.bucket_id, - access_policy_arn : module.s3-nwp-bucket.write_policy_arn + access_policy_arn : module.s3-nwp-bucket.read_policy_arn + }, + { + id : module.s3-forecast-bucket.bucket_id, + access_policy_arn : module.s3-forecast-bucket.write_policy_arn } ] @@ -349,10 +416,12 @@ module "forecast-ad" { { "name" : "LOGLEVEL", "value" : "DEBUG" }, { "name" : "NWP_ECMWF_ZARR_PATH", "value": "s3://${module.s3-nwp-bucket.bucket_id}/ecmwf/data/latest.zarr" }, { "name" : "NWP_GFS_ZARR_PATH", "value": "s3://${module.s3-nwp-bucket.bucket_id}/gfs/data/latest.zarr" }, + { "name" : "NWP_MO_GLOBAL_ZARR_PATH", "value": "s3://${module.s3-nwp-bucket.bucket_id}/metoffice/data/latest.zarr" }, { "name" : "SATELLITE_ZARR_PATH", "value": "s3://${module.s3-satellite-bucket.bucket_id}/data/latest/iodc_latest.zarr.zip" }, { "name" : "SENTRY_DSN", "value": var.sentry_dsn}, { "name" : "USE_SATELLITE", "value": "True"}, - { "name" : "CLIENT_NAME", "value": "ad"} + { "name" : "CLIENT_NAME", "value": "ad"}, + { "name" : "SAVE_BATCHES_DIR", "value": "s3://${module.s3-forecast-bucket.bucket_id}/ad"}, ] container-secret_vars = [ @@ -432,6 +501,8 @@ module "analysis_dashboard" { { "name" : "ORIGINS", "value" : "*" }, { "name" : "REGION", "value": local.domain}, { "name" : "ENVIRONMENT", "value": local.environment}, + { "name" : "AUTH0_DOMAIN", "value" : var.auth_domain }, + { "name" : "AUTH0_CLIENT_ID", "value" : var.auth_dashboard_client_id }, ] container-name = "analysis-dashboard" container-tag = var.analysis_dashboard_version diff --git a/terraform/india/development/variables.tf b/terraform/india/development/variables.tf index 2c9156b9..00dc3bf5 100644 --- a/terraform/india/development/variables.tf +++ b/terraform/india/development/variables.tf @@ -71,4 +71,9 @@ variable "auth_domain" { variable "auth_api_audience" { description = "The Auth API Audience that should be used" default = "not-set" +} + +variable "auth_dashboard_client_id" { + description = "The Auth client id for the dashboard that should be used" + default = "not-set" } \ No newline at end of file diff --git a/terraform/india/production/main.tf b/terraform/india/production/main.tf index ea76be12..0e7bf853 100644 --- a/terraform/india/production/main.tf +++ b/terraform/india/production/main.tf @@ -11,10 +11,11 @@ # 3.2 - Secret containing HF read access # 4.0 - ECS task definition for the NWP consumer # 4.1 - ECS task definition for the GFS consumer -# 4.2 - ECS task definition for Collection RUVNL data -# 4.3 - Satellite Consumer -# 4.4 - ECS task definition for the Forecast - Client RU -# 4.5 - ECS task definition for the Forecast - Client AD +# 4.2 - ECS task definition for the MetOffice consumer +# 4.3 - ECS task definition for Collection RUVNL data +# 4.4 - Satellite Consumer +# 4.5 - ECS task definition for the Forecast - Client RU +# 4.6 - ECS task definition for the Forecast - Client AD # 5.0 - Airflow EB Instance # 5.1 - India API EB Instance # 5.2 - India Analysis Dashboard @@ -201,8 +202,49 @@ module "nwp_consumer_gfs_live_ecs_task" { ] } - # 4.2 +module "nwp-consumer-metoffice-live-ecs-task" { + source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/ecs_task?ref=f0ecf51" + + ecs-task_name = "nwp-consumer-metoffice-india" + ecs-task_type = "consumer" + ecs-task_execution_role_arn = module.ecs-cluster.ecs_task_execution_role_arn + ecs-task_size = { + cpu = 512 + memory = 1024 + } + + aws-region = var.region + aws-environment = local.environment + + s3-buckets = [ + { + id : module.s3-nwp-bucket.bucket_id + access_policy_arn : module.s3-nwp-bucket.write_policy_arn + } + ] + + container-env_vars = [ + { "name" : "LOGLEVEL", "value" : "INFO" }, + { "name" : "METOFFICE_ORDER_ID", "value" : "india-11params-54steps" }, + { "name" : "MODEL_REPOSITORY", "value" : "metoffice-datahub" }, + { "name" : "CONCURRENCY", "value" : "false" }, + { "name" : "ZARRDIR", "value" : format("s3://%s/metoffice/data", module.s3-nwp-bucket.bucket_id) }, + { "name" : "SENTRY_DSN", "value" : var.sentry_dsn }, + ] + container-secret_vars = [ + { + secret_policy_arn: aws_secretsmanager_secret.nwp_consumer_secret.arn, + values: ["METOFFICE_API_KEY"], + } + ] + container-tag = "devsjc-major-refactor" + container-name = "openclimatefix/nwp-consumer" + container-command = ["consume"] +} + + +# 4.3 module "ruvnl_consumer_ecs" { source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/ecs_task?ref=205465e" @@ -237,7 +279,7 @@ module "ruvnl_consumer_ecs" { } -# 4.3 - Satellite Consumer +# 4.4 - Satellite Consumer module "satellite_consumer_ecs" { source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/ecs_task?ref=205465e" @@ -278,9 +320,9 @@ module "satellite_consumer_ecs" { } -# 4.4 - Forecast - Client RU +# 4.5 - Forecast - Client RU module "forecast" { - source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/forecast_generic?ref=42eba24" + source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/forecast_generic?ref=f0ecf51" region = var.region environment = local.environment @@ -288,7 +330,7 @@ module "forecast" { ecs_config = { docker_image = "openclimatefix/india_forecast_app" docker_version = var.version-forecast - memory_mb = 2048 + memory_mb = 4096 cpu = 1024 } rds_config = { @@ -316,7 +358,7 @@ module "forecast" { sentry_dsn = var.sentry_dsn } -# 4.5 - Forecast - Client AD +# 4.6 - Forecast - Client AD module "forecast-ad" { source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/ecs_task?ref=73255a4" @@ -372,7 +414,7 @@ module "forecast-ad" { # 5.0 module "airflow" { - source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/airflow?ref=c73cee9" + source = "github.com/openclimatefix/ocf-infrastructure//terraform/modules/services/airflow?ref=f0ecf51" aws-environment = local.environment aws-region = local.region aws-domain = local.domain @@ -426,6 +468,8 @@ module "analysis_dashboard" { { "name" : "ORIGINS", "value" : "*" }, { "name" : "REGION", "value": local.domain}, { "name" : "ENVIRONMENT", "value": local.environment}, + { "name" : "AUTH0_DOMAIN", "value" : var.auth_domain }, + { "name" : "AUTH0_CLIENT_ID", "value" : var.auth_dashboard_client_id }, ] container-name = "analysis-dashboard" container-tag = var.analysis_dashboard_version diff --git a/terraform/india/production/variables.tf b/terraform/india/production/variables.tf index 2eee7eb2..e85bf900 100644 --- a/terraform/india/production/variables.tf +++ b/terraform/india/production/variables.tf @@ -73,3 +73,8 @@ variable satellite-consumer { default = "0.0.1" description = "Container image tag of the satellite data consumer to use: openclimatefix/satip" } + +variable "auth_dashboard_client_id" { + description = "The Auth client id for the dashboard that should be used" + default = "not-set" +} diff --git a/terraform/modules/services/airflow/dags/india/nwp-dag.py b/terraform/modules/services/airflow/dags/india/nwp-dag.py index b12d1a69..716ea86f 100644 --- a/terraform/modules/services/airflow/dags/india/nwp-dag.py +++ b/terraform/modules/services/airflow/dags/india/nwp-dag.py @@ -5,6 +5,7 @@ from airflow.operators.latest_only import LatestOnlyOperator from utils.slack import on_failure_callback +from utils.s3 import determine_latest_zarr default_args = { 'owner': 'airflow', @@ -24,7 +25,13 @@ region = 'india' -with DAG(f'{region}-nwp-consumer', schedule_interval="0 * * * *", default_args=default_args, concurrency=10, max_active_tasks=10) as dag: +with DAG( + f'{region}-nwp-consumer', + schedule_interval="0 * * * *", + default_args=default_args, + concurrency=10, + max_active_tasks=10, +) as dag: dag.doc_md = "Get NWP data" latest_only = LatestOnlyOperator(task_id="latest_only") @@ -45,7 +52,6 @@ task_concurrency=10, ) - nwp_consumer_gfs = EcsRunTaskOperator( task_id=f'{region}-nwp-consumer-gfs-india', task_definition='nwp-consumer-gfs-india', @@ -62,23 +68,26 @@ task_concurrency=10, ) - # nwp_consumer_meteomatics = EcsRunTaskOperator( - # task_id=f'{region}-nwp-consumer-meteomatics-india', - # task_definition='nwp-consumer-meteomatics-india', - # cluster=cluster, - # overrides={}, - # launch_type="FARGATE", - # network_configuration={ - # "awsvpcConfiguration": { - # "subnets": [subnet], - # "securityGroups": [security_group], - # "assignPublicIp": "ENABLED", - # }, - # }, - # task_concurrency=10, - # ) + nwp_consumer_metoffice = EcsRunTaskOperator( + task_id=f'{region}-nwp-consumer-metoffice-india', + task_definition='nwp-consumer-metoffice-india', + cluster=cluster, + overrides={}, + launch_type="FARGATE", + network_configuration={ + "awsvpcConfiguration": { + "subnets": [subnet], + "securityGroups": [security_group], + "assignPublicIp": "ENABLED", + }, + }, + task_concurrency=10, + ) + rename_zarr_metoffice = determine_latest_zarr.override( + task_id="determine_latest_zarr_metoffice", + )(bucket=f"india-nwp-{env}", prefix="metoffice/data") latest_only >> nwp_consumer_ecmwf latest_only >> nwp_consumer_gfs - # latest_only >> nwp_consumer_meteomatics + latest_only >> nwp_consumer_metoffice >> rename_zarr_metoffice diff --git a/terraform/modules/services/airflow/dags/uk/nwp-dag.py b/terraform/modules/services/airflow/dags/uk/nwp-dag.py index f5d3bd37..2ed449fc 100644 --- a/terraform/modules/services/airflow/dags/uk/nwp-dag.py +++ b/terraform/modules/services/airflow/dags/uk/nwp-dag.py @@ -70,7 +70,7 @@ task_concurrency=10, ) - file = f's3://nowcasting-nwp-{env}/data-national/latest.zarr.zip' + file = f's3://nowcasting-nwp-{env}/data-metoffice/latest.zarr.zip' command = f'curl -X GET "{url}/v0/solar/GB/update_last_data?component=nwp&file={file}"' nwp_update_ukv = BashOperator( task_id="nwp-update-ukv", diff --git a/terraform/modules/services/airflow/dags/utils/s3.py b/terraform/modules/services/airflow/dags/utils/s3.py new file mode 100644 index 00000000..35cb846b --- /dev/null +++ b/terraform/modules/services/airflow/dags/utils/s3.py @@ -0,0 +1,45 @@ +from airflow.providers.amazon.aws.hooks.s3 import S3Hook +from airflow.decorators import task + +@task(task_id="determine_latest_zarr") +def determine_latest_zarr(bucket: str, prefix: str): + s3hook = S3Hook(aws_conn_id=None) # Use Boto3 default connection strategy + # Get a list of all the non-latest zarrs in the bucket prefix + prefixes = s3hook.list_prefixes(bucket_name=bucket, prefix=prefix + "/", delimiter='/') + zarrs = sorted([ + p for p in prefixes if p.endswith('.zarr/') and "latest" not in p + ], reverse=True) + # Get the size of the most recent zarr and the latest.zarr zarr + s3bucket = s3hook.get_bucket(bucket_name=bucket) + size_old, size_new = (0, 0) + if len(zarrs) == 0: + s3hook.log.info("No non-latest zarrs found in bucket, exiting") + return + + for obj in s3bucket.objects.filter(Prefix=zarrs[0]): + size_new += obj.size + + if prefix + "/latest.zarr/" in prefixes: + for obj in s3bucket.objects.filter(Prefix=prefix + "/latest.zarr/"): + size_old += obj.size + + # If the sizes are different, create a new latest.zarr + if size_old != size_new and size_new > 500 * 1e3: # Expecting at least 500KB + # Delete the old latest.zarr, if it exists + if prefix + "/latest.zarr/" in prefixes: + s3hook.log.debug(f"Deleting {prefix}/latest.zarr/") + keys_to_delete = s3hook.list_keys(bucket_name=bucket, prefix=prefix + "/latest.zarr/") + s3hook.delete_objects(bucket=bucket, keys=keys_to_delete) + # Copy the new latest.zarr + s3hook.log.info(f"Copying {zarrs[0]} to {prefix}/latest.zarr/") + source_keys = s3hook.list_keys(bucket_name=bucket, prefix=zarrs[0]) + for key in source_keys: + s3hook.copy_object( + source_bucket_name=bucket, + source_bucket_key=key, + dest_bucket_name=bucket, + dest_bucket_key=prefix + "/latest.zarr/" + key.split(zarrs[0])[-1], + ) + else: + s3hook.log.info("No changes to latest.zarr required") + diff --git a/terraform/modules/services/airflow/eb.tf b/terraform/modules/services/airflow/eb.tf index 4fdb626c..6b775ec8 100644 --- a/terraform/modules/services/airflow/eb.tf +++ b/terraform/modules/services/airflow/eb.tf @@ -141,6 +141,13 @@ resource "aws_elastic_beanstalk_environment" "eb-api-env" { resource = "" } + setting { + namespace = "aws:elasticbeanstalk:application:environment" + name = "AWS_DEFAULT_REGION" + value = var.aws-region + resource = "" + } + setting { namespace = "aws:ec2:vpc" name = "VPCId" diff --git a/terraform/modules/services/forecast_generic/ecs.tf b/terraform/modules/services/forecast_generic/ecs.tf index 0a5ed259..76f52bd7 100644 --- a/terraform/modules/services/forecast_generic/ecs.tf +++ b/terraform/modules/services/forecast_generic/ecs.tf @@ -32,6 +32,7 @@ resource "aws_ecs_task_definition" "ecs-task-definition" { {"name": "NWP_UKV_ZARR_PATH", "value":"s3://${var.s3_nwp_bucket.bucket_id}/${var.s3_nwp_bucket.datadir}/latest.zarr"}, {"name": "NWP_ECMWF_ZARR_PATH", "value":"s3://${var.s3_nwp_bucket.bucket_id}/ecmwf/data/latest.zarr"}, {"name": "NWP_GFS_ZARR_PATH", "value":"s3://${var.s3_nwp_bucket.bucket_id}/gfs/data/latest.zarr"}, + {"name": "NWP_MO_GLOBAL_ZARR_PATH", "value": "s3://${var.s3_nwp_bucket.bucket_id}/metoffice/data/latest.zarr"}, {"name": "SATELLITE_ZARR_PATH", "value":"s3://${var.s3_satellite_bucket.bucket_id}/${var.s3_satellite_bucket.datadir}/latest.zarr.zip"}, {"name": "ML_MODEL_PATH", "value": "s3://${var.s3_ml_bucket.bucket_id}/"}, {"name": "ML_MODEL_BUCKET", "value": var.s3_ml_bucket.bucket_id},