From 07293d381327ffba06f1b6f85b3a3fee298a5524 Mon Sep 17 00:00:00 2001 From: Peter Dudfield <34686298+peterdudfield@users.noreply.github.com> Date: Fri, 10 Jan 2025 11:12:46 +0000 Subject: [PATCH] New consumer ecmwf india (#731) * use latest nwp consumer ecmwf india * fix * tidy * new logic for making latest.zarr file in nwp * add xarray to install requirments * revert back to old s3 file * clean up * role bk --- terraform/modules/services/airflow/dags/utils/s3.py | 10 +++++++++- terraform/modules/services/airflow/docker-compose.yml | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/terraform/modules/services/airflow/dags/utils/s3.py b/terraform/modules/services/airflow/dags/utils/s3.py index 35cb846b..9b9ed48b 100644 --- a/terraform/modules/services/airflow/dags/utils/s3.py +++ b/terraform/modules/services/airflow/dags/utils/s3.py @@ -24,12 +24,19 @@ def determine_latest_zarr(bucket: str, prefix: str): size_old += obj.size # If the sizes are different, create a new latest.zarr + s3hook.log.info(f"size_old={size_old}, size_new={size_new}") if size_old != size_new and size_new > 500 * 1e3: # Expecting at least 500KB - # Delete the old latest.zarr, if it exists + + # delete latest.zarr + s3hook.log.info(f"Deleting {prefix}/latest.zarr/") if prefix + "/latest.zarr/" in prefixes: s3hook.log.debug(f"Deleting {prefix}/latest.zarr/") keys_to_delete = s3hook.list_keys(bucket_name=bucket, prefix=prefix + "/latest.zarr/") s3hook.delete_objects(bucket=bucket, keys=keys_to_delete) + + # move latest zarr file to latest.zarr using s3 batch jobs + s3hook.log.info(f"Creating {prefix}/latest.zarr/") + # Copy the new latest.zarr s3hook.log.info(f"Copying {zarrs[0]} to {prefix}/latest.zarr/") source_keys = s3hook.list_keys(bucket_name=bucket, prefix=zarrs[0]) @@ -40,6 +47,7 @@ def determine_latest_zarr(bucket: str, prefix: str): dest_bucket_name=bucket, dest_bucket_key=prefix + "/latest.zarr/" + key.split(zarrs[0])[-1], ) + else: s3hook.log.info("No changes to latest.zarr required") diff --git a/terraform/modules/services/airflow/docker-compose.yml b/terraform/modules/services/airflow/docker-compose.yml index f86d4964..957970e3 100644 --- a/terraform/modules/services/airflow/docker-compose.yml +++ b/terraform/modules/services/airflow/docker-compose.yml @@ -41,6 +41,7 @@ services: ECS_SECURITY_GROUP: $ECS_SECURITY_GROUP _AIRFLOW_WWW_USER_PASSWORD: ${PASSWORD} AIRFLOW_CONN_SLACK_API_DEFAULT: ${AIRFLOW_CONN_SLACK_API_DEFAULT} + user: "${AIRFLOW_UID:-50000}:0" volumes: - data:/airflow