From 180f3d0ea0eb4ff39c4ad5bba02a467d9ff92733 Mon Sep 17 00:00:00 2001 From: Antoine Tavant Date: Mon, 12 Aug 2024 19:16:54 +0200 Subject: [PATCH] Implement sklearn linear model fro ENR prediction --- pyproject.toml | 2 +- src/energy_forecast/constants.py | 15 ++++ src/energy_forecast/eco2mix.py | 3 + src/energy_forecast/enr_production_model.py | 77 +++++++++++++++++++++ src/energy_forecast/meteo.py | 4 +- 5 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 src/energy_forecast/constants.py create mode 100644 src/energy_forecast/enr_production_model.py diff --git a/pyproject.toml b/pyproject.toml index 2a0a474..7050f1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,4 +97,4 @@ exclude_lines = [ ] [tool.ruff] -extend-include = ["*.ipynb"] \ No newline at end of file +extend-include = ["*.ipynb"] diff --git a/src/energy_forecast/constants.py b/src/energy_forecast/constants.py new file mode 100644 index 0000000..2ed22fb --- /dev/null +++ b/src/energy_forecast/constants.py @@ -0,0 +1,15 @@ + +region_names = ["Île-de-France", + "Centre-Val de Loire", + "Bourgogne-Franche-Comté", + "Normandie", + "Hauts-de-France", + "Grand Est", + "Pays de la Loire", + "Bretagne", + "Nouvelle-Aquitaine", + "Occitanie", + "Auvergne-Rhône-Alpes", + "Provence-Alpes-Côte d'Azur", + "Corse" + ] diff --git a/src/energy_forecast/eco2mix.py b/src/energy_forecast/eco2mix.py index 8ab0a3b..8ebc5f4 100644 --- a/src/energy_forecast/eco2mix.py +++ b/src/energy_forecast/eco2mix.py @@ -1,4 +1,7 @@ +"""Access the RTE eco2mix via ODRE API to get real-time data on the French electricity grid. +Needed mostly for the regional data, as the national data is available on the RTE API. +""" import logging import numpy as np diff --git a/src/energy_forecast/enr_production_model.py b/src/energy_forecast/enr_production_model.py new file mode 100644 index 0000000..36c272b --- /dev/null +++ b/src/energy_forecast/enr_production_model.py @@ -0,0 +1,77 @@ +from pathlib import Path +import pandas as pd +from sklearn import pipeline, linear_model +from energy_forecast import ROOT_DIR +from joblib import dump, load + +class ENRProductionModel: + """Model to predict the production of renewable energy sources. + + Implements a pair of linear regression models to predict the production of solar and wind energy + from France regions weather data. + + Parameters + ---------- + model_wind : sklearn.pipeline.Pipeline | None + Model to predict the wind energy production. + model_sun : sklearn.pipeline.Pipeline | None + Model to predict the solar energy production. + + Examples + -------- + >>> model = ENRProductionModel() + >>> model.fit(sun_flux, wind_speed, energy_data) + >>> predictions = model.predict(sun_flux, wind_speed) + >>> model.save("path/to/save") + """ + + def __init__(self, model_wind=None, model_sun=None) -> None: + self.model_wind = model_wind or pipeline.Pipeline([ + ("model", linear_model.LinearRegression(positive=True, fit_intercept=False)) + ]) + self.model_sun = model_sun or pipeline.Pipeline([ + ("model", linear_model.LinearRegression(positive=True, fit_intercept=False)) + ]) + + @staticmethod + def pre_process_sun_flux(sun_flux:pd.DataFrame) -> pd.DataFrame: + return sun_flux + + @staticmethod + def pre_process_wind_speed(wind_speed:pd.DataFrame) -> pd.DataFrame: + X_squared = wind_speed ** 2 + X_squared.columns = [f"{col}_squared" for col in X_squared.columns] + X_cubed = wind_speed ** 3 + X_cubed.columns = [f"{col}_cubed" for col in X_cubed.columns] + + wind_speed = pd.concat([wind_speed, X_squared, X_cubed], axis=1) + + return wind_speed + + def fit(self, sun_flux:pd.DataFrame, wind_speed:pd.DataFrame, productions:pd.DataFrame) -> None: + wind_speed_preprocessed = self.pre_process_wind_speed(wind_speed) + self.model_wind.fit(wind_speed_preprocessed, productions["wind"]) + sun_flux_preprocessed = self.pre_process_sun_flux(sun_flux) + self.model_sun.fit(sun_flux_preprocessed, productions["sun"]) + + def predict(self, sun_flux:pd.DataFrame, wind_speed:pd.DataFrame) -> pd.DataFrame: + self.predictions = pd.DataFrame() + wind_speed_preprocessed = self.pre_process_wind_speed(wind_speed) + self.predictions["wind"] = self.model_wind.predict(wind_speed_preprocessed) + sun_flux_preprocessed = self.pre_process_sun_flux(sun_flux) + self.predictions["sun"] = self.model_sun.predict(sun_flux_preprocessed) + return self.predictions + + def save(self, path:str | Path | None=None) -> None: + path = path or ROOT_DIR / "data" / "production_prediction" + path = Path(path) + path.mkdir(parents=True, exist_ok=True) + dump(self, path / "model.pkl") + + @classmethod + def load(cls, path:str | Path | None=None) -> "ENRProductionModel": + path = path or ROOT_DIR / "data" / "production_prediction" + path = Path(path) + instance = load(path / "model.pkl") + return instance + diff --git a/src/energy_forecast/meteo.py b/src/energy_forecast/meteo.py index 243cc20..4310229 100644 --- a/src/energy_forecast/meteo.py +++ b/src/energy_forecast/meteo.py @@ -377,6 +377,8 @@ def warm_cache(logger, date=None, max_counter=30, sleep_duration=600): if counter > max_counter: raise TimeoutError("Max counter reached") + + if __name__ == "__main__": logger.info("Fetching data for today") - warm_cache(logger) \ No newline at end of file + warm_cache(logger)