From 7da4a778dfc5d73c61a0e2aa0e587402d5998736 Mon Sep 17 00:00:00 2001 From: Antoine Tavant Date: Fri, 16 Aug 2024 14:53:05 +0200 Subject: [PATCH] Add S3 download function --- .gitignore | 1 + doc/getting_started/data_sources.rst | 90 +++++++++++++++++++++++++++ doc/getting_started/index.rst | 8 +++ src/energy_forecast/meteo.py | 93 ++++++++++++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 doc/getting_started/data_sources.rst diff --git a/.gitignore b/.gitignore index e0bbbd7..130d1fe 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,4 @@ data/bronze/*.xls doc/_build .vscode/settings.json data/silver/*.nc +data/silver/weather_forecasts/*.nc diff --git a/doc/getting_started/data_sources.rst b/doc/getting_started/data_sources.rst new file mode 100644 index 0000000..fed9a75 --- /dev/null +++ b/doc/getting_started/data_sources.rst @@ -0,0 +1,90 @@ +Data Sources +============ + +This Page lists the different data sources available, and the corresponding way to access them. + +Energy Data Sources +------------------- + +Predicted consumption computed by RTE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The total consumption of France is computed by RTE. + +The corresponding API is named `Consumption `_ +You can use it to access + +- "short_term" : the expected consumption for today, tomorrow (D-1) and the day after tomorrow (D-2) +- "weekly_forecasts" : the expected consumption for D-3 to D-9 + +When requesting dates in the past, the history of the forecasts is available. + +To access it, use the class ``energy_forecast.consumption_forecast.PredictionForecastAPI`` + +Predicted Productions computed by RTE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Solar and Eolian productions in France is computed by RTE. + +The corresponding API is named `Generation Forecast `_ +You can use it to access + +- "SOLAR" : the expected solar production for today (D-0), and tomorrow (D-1) +- "WIND_ONSHORE" : the expected eolian production for today (D-0), and tomorrow (D-1) + +Other production means are available, some with a longer forecast horizon (D-2 to D-3) + +When requesting dates in the past, the history of the forecasts is available. +However, the API is limited to 21 days of history per call, so a lot of calls are needed to get a long history. + + +When requesting dates in the past, the history of the forecasts is available. + +To access it, use the class ``energy_forecast.production_forecast.ProductionForecastAPI`` + +Tempo labels computed by RTE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Tempo labels are provided by RTE. +The corresponding API is named `Tempo Like Supply Contract `_ + +You can use it to access the tempo labels for the current day, and the next day, and all the history of the tempo labels. + +To access it, use the class ``energy_forecast.tempo_rte.TempoSignalAPI`` + +About Eco2mix +------------- + +Eco2mix is a service provided by RTE, which provides a lot of data about the energy production and consumption in France. +Some are the same as the ones provided by the APIs above, but some are not. + +In particular, the historical regional production and consumption are available. + +You can access it using the function ``energy_forecast.eco2mix.get_data`` (Refactoring in progress) + +Weather Data Sources +-------------------- + +Weather forecasts +~~~~~~~~~~~~~~~~~ + +The weather forecasts are provided by Meteo France with different levels of details, corresponding to different model names. + +The raw files can be downloaded from `meteo.data.gouv.fr `_ + +You can access the Arpege model forecasts using the class ``energy_forecast.meteo.ArpegeSimpleAPI`` +This client class download the data as Grib2 files, which requier Xarray and cfgrib to be read. + +Historical weather forecasts +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The historical weather forecasts are not publicly available. +Fortunately, this data as beed stored by a french assosiation of amateur meteorologists. + +The interesting parameters has been extected from the raw data, and are stored in ``data/silver/weather``. + +To ge the data the first time, you need to download it from S3 using ``energy_forecast.meteo.download_historical_forecasts``. +To do so, the S3 credentials are needed, contact the project's owner. + +Up to D+3 forecasts are available from the model Arpege, from 2022-01-01 to 2024-04-01. + diff --git a/doc/getting_started/index.rst b/doc/getting_started/index.rst index 0e0c1a2..d65b8b8 100644 --- a/doc/getting_started/index.rst +++ b/doc/getting_started/index.rst @@ -5,6 +5,13 @@ Project description ------------------- See the :doc:`project description ` for a detailed description of the project. + +Data sources +------------ + +See the :doc:`data sources ` for a detailed description of the data sources available +and the corresponding way to access them. + Installation ------------ You can install the package from the source code by cloning the repository and @@ -44,3 +51,4 @@ To see move detailed use cases, you can check the tutorials in the :doc:`../user :hidden: project_description + data_sources diff --git a/src/energy_forecast/meteo.py b/src/energy_forecast/meteo.py index 4310229..a652a85 100644 --- a/src/energy_forecast/meteo.py +++ b/src/energy_forecast/meteo.py @@ -377,6 +377,99 @@ def warm_cache(logger, date=None, max_counter=30, sleep_duration=600): if counter > max_counter: raise TimeoutError("Max counter reached") +def download_historical_forecasts(s3_key, + s3_secret, + s3_entrypoint, + s3_bucket, + prefix="./", + variables="all", + forecast_type="all", + dryrun=False + ): + """Download the historical forecasts from the S3 bucket. + + Parameters + ---------- + s3_key : str + the key to access the S3 bucket. + s3_secret : str + the secret to access the S3 bucket. + s3_entrypoint : str + the entrypoint of the S3 bucket. + s3_bucket : str + the name of the S3 bucket. + prefix : str + The prefix where the files are downloaded. + Should be similar to ``"./data/silver"``. + variables : str or list[str], optional + the variables to download. + Can be ``"wind_speed_hourly"``, ``"sun_flux_downward_hourly"``, or ``"temperature_hourly"`` + or a list of these values. + Default is ``"all"``, which downloads all the variables. + forecast_type : str or list[str], optional + the forecast type to download. + Can be ``"d0"``, ``"d1"``, ``"d2"``, or ``"d3"``, + or a list of these values. + Default is ``"all"``, which downloads all the forecast types. + dryrun : bool, optional + if True, do not download the files. + Default is False. + + Returns + ------- + list[Path] + the list of the files downloaded. + """ + import boto3 + + session = boto3.Session( + aws_access_key_id=s3_key, + aws_secret_access_key=s3_secret, + ) + s3 = session.resource("s3", endpoint_url=s3_entrypoint) + bucket = s3.Bucket(s3_bucket) + list_files = [] + key_prefix = "weather_forecasts" + if variables == "all": + variables = ["wind_speed_hourly", + "sun_flux_downward_hourly", + "temperature_hourly"] + if isinstance(variables, str): + variables = [variables] + for var in variables: + if var not in ["wind_speed_hourly", + "sun_flux_downward_hourly", + "temperature_hourly"]: + raise ValueError(f"Unknown variable {var} : must be in ['wind_speed_hourly', 'sun_flux_downward_hourly', 'temperature_hourly']") + if forecast_type == "all": + forecast_type = ["d0", "d1", "d2", "d3"] + if isinstance(forecast_type, str): + forecast_type = [forecast_type] + for forecast in forecast_type: + if forecast not in ["d0", "d1", "d2", "d3"]: + raise ValueError(f"Unknown forecast type {forecast} : must be in ['d0', 'd1', 'd2', 'd3']") + + for var in variables: + for forecast in forecast_type: + key = f"{key_prefix}/{var}_{forecast}.nc" + # test if the key exists + filename = Path(prefix + "/" + key) + if filename.exists(): + print(f"{filename} already downloaded, skipping") + continue + filename.parent.mkdir(parents=True, exist_ok=True) + if dryrun: + print(f"DRY RUN : would Download {key} to {filename}") + # test if the key exists without downloading it + try : + s3.Object(s3_bucket, key).load() + except Exception as e: + print(e) + + else: + bucket.download_file(key, filename) + list_files.append(filename) + return list_files if __name__ == "__main__":