diff --git a/ingestion_server/ingestion_server/cleanup.py b/ingestion_server/ingestion_server/cleanup.py index b2faeb0a701..1d40e37e11f 100644 --- a/ingestion_server/ingestion_server/cleanup.py +++ b/ingestion_server/ingestion_server/cleanup.py @@ -303,6 +303,10 @@ def save_cleaned_data(result: dict) -> dict[str, int]: def _get_s3_resource(): + """ + Locally, it connects to a MinIO instance through its endpoint and test credentials. + On live environments, the connection is allowed via IAM roles. + """ if config("ENVIRONMENT", default="local") == "local": return boto3.resource( "s3", @@ -317,15 +321,18 @@ def _get_s3_resource(): def _upload_to_s3(fields): + """ + Upload cleaned data to S3. It assumes that the bucket already exists. + """ bucket_name = config("OPENVERSE_BUCKET", default="openverse-catalog") s3_path = "shared/data-refresh-cleaned-data" try: s3 = _get_s3_resource() + s3.meta.client.head_bucket(Bucket=bucket_name) bucket = s3.Bucket(bucket_name) - bucket.load() log.info(f"Connected to S3 and '{bucket_name}' bucket loaded.") except Exception as e: - log.error(f"Upload failed. Error connecting to S3 or loading bucket: {e}") + log.error(f"Files upload failed. Error connecting to S3.\n{e}") return for field in fields: