Skip to content

Commit

Permalink
Add GOES Data Download Manager Script (#240)
Browse files Browse the repository at this point in the history
* added new file of download manager for goes

* added new file of download manager for goes

* added new file of download manager for goes

* added check for existing file

* Determine time increment based on product/domain

* made changes in eumesat.py file

* added new file download_manager.py

* made changes in app.py

* minor changes

* minor fix in eumetsat.py file

* changed name of downloadmanager

* changed name of downloadmanager

* changed name of downloadmanager

* changed name of downloadmanager

* changed name of downloadmanager

* changed name of downloadmanager

* fixed test_utils.py file
  • Loading branch information
14Richa committed Mar 27, 2024
1 parent f69f98e commit 54fe9ec
Show file tree
Hide file tree
Showing 10 changed files with 244 additions and 56 deletions.
4 changes: 2 additions & 2 deletions satip/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import satip
from satip import utils
from satip.eumetsat import DownloadManager
from satip.eumetsat import EUMETSATDownloadManager

log = structlog.stdlib.get_logger()

Expand Down Expand Up @@ -137,7 +137,7 @@ def run(
)
# 1. Get data from API, download native files
with tempfile.TemporaryDirectory() as tmpdir:
download_manager = DownloadManager(
download_manager = EUMETSATDownloadManager(
user_key=api_key,
user_secret=api_secret,
data_dir=tmpdir,
Expand Down
5 changes: 3 additions & 2 deletions satip/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import yaml

from satip import eumetsat
from satip.eumetsat import EUMETSATDownloadManager
from satip.utils import format_dt_str

log = structlog.stdlib.get_logger()
Expand Down Expand Up @@ -110,7 +111,7 @@ def download_eumetsat_data(
end_date = datetime.now()

# Download the data
dm = eumetsat.DownloadManager(user_key, user_secret, download_directory, download_directory)
dm = EUMETSATDownloadManager(user_key, user_secret, download_directory, download_directory)
products_to_use = []
if "rss" in product:
products_to_use.append(RSS_ID)
Expand Down Expand Up @@ -159,7 +160,7 @@ def download_eumetsat_data(


def _download_time_range(
x: Tuple[Tuple[datetime, datetime], str, eumetsat.DownloadManager]
x: Tuple[Tuple[datetime, datetime], str, EUMETSATDownloadManager]
) -> None:
time_range, product_id, download_manager = x
start_time, end_time = time_range
Expand Down
90 changes: 90 additions & 0 deletions satip/download_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Satip Download Manager
This module provides a unified interface for downloading EUMETSAT and GOES
satellite data via the `DownloadManager` class. Users specify the provider
('EUMETSAT' or 'GOES'), and the manager delegates tasks to dedicated
sub-modules for retrieval, storage, and logging.
Key functionalities:
* Download data for a specified time range.
* Handle user authentication (for EUMETSAT data).
* Manage data retrieval, storage, and logging for both providers.
"""

import warnings

import structlog

from satip.eumetsat import EUMETSATDownloadManager
from satip.goes_download_manager import GOESDownloadManager

log = structlog.stdlib.get_logger()

# Suppress FutureWarning related to 'H' argument
warnings.filterwarnings('ignore', category=FutureWarning)
# constants for different data sources
EUMETSAT_PROVIDER = "EUMETSAT"
GOES_PROVIDER = "GOES"



class DownloadManager:
"""
Main download manager class to handle both EUMETSAT
and GOES data downloading based on the provider.
Example usage:
if __name__ == "__main__":
provider = "GOES"
user_key = "your_user_key"
user_secret = "your_user_secret"
data_dir = "path to data directory"
log_directory = "path to log directory"
start_time = datetime.datetime(2024, 3, 1, 0, 0)
end_time = datetime.datetime(2024, 3, 1, 6, 0)
if data_dir is not None:
manager = DownloadManager(provider, None, None, data_dir, log_directory)
manager.download_data(start_time, end_time)
else:
print("Error: 'data_dir' is not properly set.")
"""

def __init__(self, provider, user_key=None,
user_secret=None, data_dir=None,
log_directory=None):
"""
Initialize the DownloadManager.
Args:
provider (str): Provider name ('EUMETSAT' or 'GOES').
user_key (str): User key for accessing data (for EUMETSAT).
user_secret (str): User secret for accessing data (for EUMETSAT).
data_dir (str): Directory to save downloaded data.
log_directory (str): Directory to save logs.
"""
self.provider = provider

if self.provider == "EUMETSAT":
self.download_manager = EUMETSATDownloadManager(user_key, user_secret,
data_dir, log_directory)
elif self.provider == "GOES":
self.download_manager = GOESDownloadManager(data_dir, log_directory)
else:
raise ValueError("Invalid provider. Supported providers are 'EUMETSAT' and 'GOES'.")

def download_data(self, start_time, end_time):
"""
Download data for the specified time range.
Args:
start_time (datetime): Start of the download period.
end_time (datetime): End of the download period.
"""
if self.provider == "GOES":
self.download_manager.download_goes_data(start_time, end_time)
68 changes: 33 additions & 35 deletions satip/eumetsat.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

log = structlog.stdlib.get_logger()


API_ENDPOINT = "https://api.eumetsat.int"

# Data Store searching endpoint
Expand Down Expand Up @@ -195,13 +196,41 @@ def dataset_id_to_link(collection_id, data_id, access_token):
+ access_token
)

def get_filesize_megabytes(filename):
"""Returns filesize in megabytes"""
filesize_bytes = os.path.getsize(filename)
return filesize_bytes / 1e6


def eumetsat_filename_to_datetime(inner_tar_name):
"""Extracts datetime from EUMETSAT filename.
Takes a file from the EUMETSAT API and returns
the date and time part of the filename.
Args:
inner_tar_name: Filename part which contains the datetime information.
class DownloadManager: # noqa: D205
Usage example:
eumetsat_filename_to_datetime(filename)
"""
The DownloadManager class

provides a handler for downloading data from the EUMETSAT API,
managing: retrieval, logging and metadata
p = re.compile(r"^MSG[1234]-SEVI-MSG15-0[01]00-NA-(\d*)\.")
title_match = p.match(inner_tar_name)
date_str = title_match.group(1)
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")


def eumetsat_cloud_name_to_datetime(filename: str):
"""Takes a file from the EUMETSAT API and returns the it's datetime part for Cloud mask files"""
date_str = filename.split("0100-0100-")[-1].split(".")[0]
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")



class EUMETSATDownloadManager:
"""
Manager class for downloading EUMETSAT data.
"""

def __init__(
Expand Down Expand Up @@ -648,34 +677,3 @@ def create_and_download_datatailor_data(

except Exception as e:
log.warn(f"Failed deleting customization {jobID}: {e}", exc_info=True)


def get_filesize_megabytes(filename):
"""Returns filesize in megabytes"""
filesize_bytes = os.path.getsize(filename)
return filesize_bytes / 1e6


def eumetsat_filename_to_datetime(inner_tar_name):
"""Extracts datetime from EUMETSAT filename.
Takes a file from the EUMETSAT API and returns
the date and time part of the filename.
Args:
inner_tar_name: Filename part which contains the datetime information.
Usage example:
eumetsat_filename_to_datetime(filename)
"""

p = re.compile(r"^MSG[1234]-SEVI-MSG15-0[01]00-NA-(\d*)\.")
title_match = p.match(inner_tar_name)
date_str = title_match.group(1)
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")


def eumetsat_cloud_name_to_datetime(filename: str):
"""Takes a file from the EUMETSAT API and returns the it's datetime part for Cloud mask files"""
date_str = filename.split("0100-0100-")[-1].split(".")[0]
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")
96 changes: 96 additions & 0 deletions satip/goes_download_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Script for downloading GOES data.
"""

import datetime
import logging
import os

from goes2go import GOES


class GOESDownloadManager:
"""
Manager class for downloading GOES data.
"""
def __init__(self, data_dir, log_directory=None):
"""
Initialize the GOESDownloadManager.
Args:
data_dir (str): Directory to save downloaded GOES data.
log_directory (str, optional): Directory to save logs.
If None, logging is printed to STDOUT.
"""
self.data_dir = data_dir
self.ensure_directory_exists(self.data_dir)

if log_directory:
self.ensure_directory_exists(log_directory)
logging.basicConfig(
filename=os.path.join(log_directory, 'goes_download.log'),
level=logging.INFO)
else:
logging.basicConfig(level=logging.INFO)

logging.info(f"GOESDownloadManager initialized. Data will be saved to: {data_dir}")

@staticmethod
def ensure_directory_exists(directory):
"""Ensures the specified directory exists, creating it if necessary."""
if not os.path.exists(directory):
try:
os.makedirs(directory)
logging.info(f"Created directory: {directory}")
except Exception as e:
logging.error(f"Error creating directory {directory}: {e}")
raise
def download_goes_data(self, start_time, end_time, product='ABI-L1b-RadC',
domain='F', satellite=16):
"""
Download GOES data for a specified time range and product.
Args:
start_time (datetime): Start of the download period.
end_time (datetime): End of the download period.
product (str): GOES product identifier. Default is 'ABI-L1b-RadC'.
domain (str): Domain for the product. Default is 'F' (Full Disk).
satellite (int): GOES satellite number. Default is 16.
"""
G = GOES(satellite=satellite, product=product, domain=domain)
current_time = start_time

# Determine time increment based on product/domain
time_increment = 1 # Default time increment (minutes)
if product == 'ABI-L1b-RadC' and domain == 'F':
time_increment = 10

while current_time <= end_time:
try:
# Download the data
ds = G.nearesttime(current_time)

# Get acquisition time from the dataset
acquisition_time = ds.time.data.item()

# Format the acquisition time for filename
date_string = acquisition_time.strftime("%Y-%m-%d_%H-%M-%S")
filename = f"goes_data_{date_string}.nc"
filepath = os.path.join(self.data_dir, filename)

# Check if data for current acquisition time already exists
if os.path.exists(filepath):
logging.info(f"Data for {date_string} already exists. Skipping.")
current_time += datetime.timedelta(minutes=time_increment)
continue

# Save to NetCDF
ds.to_netcdf(filepath)

logging.info(f"Downloaded and saved GOES data to: {filename}")
except Exception as e:
logging.error(f"Error downloading GOES data for {current_time}: {e}")

current_time += datetime.timedelta(minutes=time_increment)

logging.info("Completed GOES data download.")
4 changes: 2 additions & 2 deletions scripts/extend_gcp_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import xarray as xr
import satpy
from satpy import Scene
from satip.eumetsat import DownloadManager
from satip.eumetsat import EUMETSATDownloadManager
from satip.scale_to_zero_to_one import ScaleToZeroToOne
from satip.serialize import serialize_attrs
from satip.utils import convert_scene_to_dataarray
Expand All @@ -17,7 +17,7 @@
def download_data(last_zarr_time):
api_key = os.environ["SAT_API_KEY"]
api_secret = os.environ["SAT_API_SECRET"]
download_manager = DownloadManager(user_key=api_key, user_secret=api_secret, data_dir="/mnt/disks/data/native_files/")
download_manager = EUMETSATDownloadManager(user_key=api_key, user_secret=api_secret, data_dir="/mnt/disks/data/native_files/")
start_date = pd.Timestamp.utcnow().tz_convert('UTC').to_pydatetime().replace(tzinfo=None)
last_zarr_time = pd.Timestamp(last_zarr_time).to_pydatetime().replace(tzinfo=None)
start_str = last_zarr_time.strftime("%Y-%m-%d")
Expand Down
4 changes: 2 additions & 2 deletions scripts/generate_test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import matplotlib.pyplot as plt
import xarray as xr

from satip import eumetsat
from satip import EUMETSATDownloadManager
from satip.utils import (
load_cloudmask_to_dataarray,
load_native_to_dataarray,
Expand All @@ -34,7 +34,7 @@ def generate_test_plots():
user_key = os.environ.get("EUMETSAT_USER_KEY")
user_secret = os.environ.get("EUMETSAT_USER_SECRET")

download_manager = eumetsat.DownloadManager(
download_manager = EUMETSATDownloadManager(
user_key=user_key,
user_secret=user_secret,
data_dir=os.getcwd(),
Expand Down
6 changes: 3 additions & 3 deletions scripts/process_monthly_zarrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from satpy import Scene
from tqdm import tqdm

from satip.eumetsat import DownloadManager, eumetsat_filename_to_datetime
from satip.eumetsat import EUMETSATDownloadManager, eumetsat_filename_to_datetime
from satip.jpeg_xl_float_with_nans import JpegXlFloatWithNaNs
from satip.scale_to_zero_to_one import ScaleToZeroToOne
from satip.serialize import serialize_attrs
Expand All @@ -32,7 +32,7 @@ def func(datasets_and_tuples_and_return_data):
datasets = [datasets]
api_key = os.environ["SAT_API_KEY"]
api_secret = os.environ["SAT_API_SECRET"]
download_manager = DownloadManager(
download_manager = EUMETSATDownloadManager(
user_key=api_key, user_secret=api_secret, data_dir=tmpdir
)
download_manager.download_datasets(datasets)
Expand Down Expand Up @@ -279,7 +279,7 @@ def create_dummy_zarr(datasets, base_path):
date_range = pd.date_range(start="2011-01-01 00:00", end="2019-01-01 00:00", freq="1M")
api_key = os.environ["SAT_API_KEY"]
api_secret = os.environ["SAT_API_SECRET"]
download_manager = DownloadManager(user_key=api_key, user_secret=api_secret, data_dir="./")
download_manager = EUMETSATDownloadManager(user_key=api_key, user_secret=api_secret, data_dir="./")
first = True
for date in date_range[::-1]:
start_date = pd.Timestamp(date) - pd.Timedelta("1M")
Expand Down
Loading

0 comments on commit 54fe9ec

Please sign in to comment.