Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add back in pvnet ecmwf old model #141

Merged
merged 4 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pvnet_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def app(
- SENTRY_DSN, optional link to sentry
- ENVIRONMENT, the environment this is running in, defaults to local
- USE_ECMWF_ONLY, option to use ecmwf only model, defaults to false
- USE_OCF_DATA_SAMPLER, option to use ocf_data_sampler, defaults to true

Args:
t0 (datetime): Datetime at which forecast is made
Expand All @@ -127,18 +128,20 @@ def app(
use_day_ahead_model = os.getenv("DAY_AHEAD_MODEL", "false").lower() == "true"
use_ecmwf_only = os.getenv("USE_ECMWF_ONLY", "false").lower() == "true"
run_extra_models = os.getenv("RUN_EXTRA_MODELS", "false").lower() == "true"
use_ocf_data_sampler = os.getenv("USE_OCF_DATA_SAMPLER", "true").lower() == "true"

logger.info(f"Using `pvnet` library version: {pvnet.__version__}")
logger.info(f"Using `pvnet_app` library version: {pvnet_app.__version__}")
logger.info(f"Using {num_workers} workers")
logger.info(f"Using day ahead model: {use_day_ahead_model}")
logger.info(f"Using ecwmwf only: {use_ecmwf_only}")
logger.info(f"Using ecmwf only: {use_ecmwf_only}")
logger.info(f"Running extra models: {run_extra_models}")

# load models
model_configs = get_all_models(get_ecmwf_only=use_ecmwf_only,
get_day_ahead_only=use_day_ahead_model,
run_extra_models=run_extra_models)
run_extra_models=run_extra_models,
use_ocf_data_sampler=use_ocf_data_sampler)

logger.info(f"Using adjuster: {model_configs[0].use_adjuster}")
logger.info(f"Saving GSP sum: {model_configs[0].save_gsp_sum}")
Expand Down
24 changes: 24 additions & 0 deletions pvnet_app/model_configs/all_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,29 @@ models:
version: 4fe6b1441b6dd549292c201ed85eee156ecc220c
ecmwf_only: True
uses_satellite_data: False
# This is the old model for pvnet and pvnet_ecmwf
- name: pvnet_v2
pvnet:
repo: openclimatefix/pvnet_uk_region
version: 62e5e20ab793cee7cf94eadac870d2199501a730
summation:
repo: openclimatefix/pvnet_v2_summation
version: ffac655f9650b81865d96023baa15839f3ce26ec
use_adjuster: True
save_gsp_sum: False
verbose: True
save_gsp_to_recent: True
uses_ocf_data_sampler: False
- name: pvnet_ecmwf # this name is important as it used for blending
pvnet:
repo: openclimatefix/pvnet_uk_region
version: 35d55181a82440bdd087f380d650bfd0b64bd322
summation:
repo: openclimatefix/pvnet_v2_summation
version: 9002baf1e9dc1ec141f3c4a1fa8447b6316a4558
ecmwf_only: True
uses_satellite_data: False
uses_ocf_data_sampler: False
# The day ahead model has not yet been re-trained with data-sampler.
# It will be run with the legacy dataloader using ocf_datapipes
- name: pvnet_day_ahead
Expand All @@ -61,4 +84,5 @@ models:
verbose: True
save_gsp_to_recent: True
day_ahead: True
uses_ocf_data_sampler: False

19 changes: 17 additions & 2 deletions pvnet_app/model_configs/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ class Model(BaseModel):
True, title="Uses Satellite Data", description="If this model uses satellite data"
)

uses_ocf_data_sampler: Optional[bool] = Field(
True, title="Uses OCF Data Sampler", description="If this model uses data sampler, old one uses ocf_datapipes"
)




class Models(BaseModel):
"""A group of ml models"""
Expand All @@ -60,8 +66,8 @@ class Models(BaseModel):
@field_validator("models")
@classmethod
def name_must_be_unique(cls, v: List[Model]) -> List[Model]:
"""Ensure that all model names are unique"""
names = [model.name for model in v]
"""Ensure that all model names are unique, respect to using ocf_data_sampler or not"""
names = [(model.name,model.uses_ocf_data_sampler) for model in v]
unique_names = set(names)

if len(names) != len(unique_names):
Expand All @@ -73,6 +79,7 @@ def get_all_models(
get_ecmwf_only: Optional[bool] = False,
get_day_ahead_only: Optional[bool] = False,
run_extra_models: Optional[bool] = False,
use_ocf_data_sampler: Optional[bool] = True,
) -> List[Model]:
"""
Returns all the models for a given client
Expand All @@ -81,6 +88,7 @@ def get_all_models(
get_ecmwf_only: If only the ECMWF model should be returned
get_day_ahead_only: If only the day ahead model should be returned
run_extra_models: If extra models should be run
use_ocf_data_sampler: If the OCF Data Sampler should be used
"""

# load models from yaml file
Expand All @@ -107,6 +115,13 @@ def get_all_models(
log.info("Not running extra models")
models.models = [model for model in models.models if model.name == "pvnet_v2"]

if use_ocf_data_sampler:
log.info("Using OCF Data Sampler")
models.models = [model for model in models.models if model.uses_ocf_data_sampler]
else:
log.info("Not using OCF Data Sampler, using ocf_datapipes")
models.models = [model for model in models.models if not model.uses_ocf_data_sampler]

return models.models


Expand Down
14 changes: 13 additions & 1 deletion tests/model_configs/test_pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_get_all_models_get_ecmwf_only():

def test_get_all_models_get_day_ahead_only():
"""Test for getting all models with ecmwf_only"""
models = get_all_models(get_day_ahead_only=True)
models = get_all_models(get_day_ahead_only=True, use_ocf_data_sampler=False)
assert len(models) == 1
assert models[0].day_ahead

Expand All @@ -28,3 +28,15 @@ def test_get_all_models_run_extra_models():
models = get_all_models(run_extra_models=True)
assert len(models) == 5


def test_get_all_models_ocf_data_sampler():
"""Test for getting all models with ecmwf_only"""
models = get_all_models(use_ocf_data_sampler=True, run_extra_models=True)
assert len(models) == 5

models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True)
assert len(models) == 2

models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True, get_day_ahead_only=True)
assert len(models) == 1

74 changes: 72 additions & 2 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,15 @@ def test_app_day_ahead_model(

os.environ["DAY_AHEAD_MODEL"] = "True"
os.environ["RUN_EXTRA_MODELS"] = "False"
os.environ["USE_OCF_DATA_SAMPLER"] = "False"

# Run prediction
# Thes import needs to come after the environ vars have been set
from pvnet_app.app import app

app(gsp_ids=list(range(1, 318)), num_workers=2)

all_models = get_all_models(get_day_ahead_only=True)
all_models = get_all_models(get_day_ahead_only=True, use_ocf_data_sampler=False)

# Check correct number of forecasts have been made
# (317 GSPs + 1 National + maybe GSP-sum) = 318 or 319 forecasts
Expand Down Expand Up @@ -174,6 +175,7 @@ def test_app_no_sat(
os.environ["RUN_EXTRA_MODELS"] = "True"
os.environ["SAVE_GSP_SUM"] = "True"
os.environ["DAY_AHEAD_MODEL"] = "False"
os.environ["USE_OCF_DATA_SAMPLER"] = "True"

# Run prediction
# Thes import needs to come after the environ vars have been set
Expand Down Expand Up @@ -213,4 +215,72 @@ def test_app_no_sat(
expected_forecast_results += 317 * model_config.save_gsp_to_recent
expected_forecast_results += model_config.save_gsp_sum # optional Sum of GSPs

assert len(db_session.query(ForecastValueSevenDaysSQL).all()) == expected_forecast_results * 16
assert len(db_session.query(ForecastValueSevenDaysSQL).all()) == expected_forecast_results * 16


# test legacy models
# Its nice to have this here, so we can run the latest version in production, but still use the old models
# Once we have re trained PVnet summation models we can remove this
def test_app_ocf_datapipes(
db_session, nwp_ukv_data, nwp_ecmwf_data, sat_5_data, gsp_yields_and_systems, me_latest
):
"""Test the app running the day ahead model"""

with tempfile.TemporaryDirectory() as tmpdirname:
os.chdir(tmpdirname)

temp_nwp_path = "temp_nwp_ukv.zarr"
os.environ["NWP_UKV_ZARR_PATH"] = temp_nwp_path
nwp_ukv_data.to_zarr(temp_nwp_path)

temp_nwp_path = "temp_nwp_ecmwf.zarr"
os.environ["NWP_ECMWF_ZARR_PATH"] = temp_nwp_path
nwp_ecmwf_data.to_zarr(temp_nwp_path)

temp_sat_path = "temp_sat.zarr.zip"
os.environ["SATELLITE_ZARR_PATH"] = temp_sat_path
with zarr.storage.ZipStore(temp_sat_path, mode="x") as store:
sat_5_data.to_zarr(store)

os.environ["DAY_AHEAD_MODEL"] = "False"
os.environ["RUN_EXTRA_MODELS"] = "False"
os.environ["USE_OCF_DATA_SAMPLER"] = "False"

# Run prediction
# Thes import needs to come after the environ vars have been set
from pvnet_app.app import app

app(gsp_ids=list(range(1, 318)), num_workers=2)

all_models = get_all_models(use_ocf_data_sampler=False)

# Check correct number of forecasts have been made
# (317 GSPs + 1 National + maybe GSP-sum) = 318 or 319 forecasts
# Forecast made with multiple models
expected_forecast_results = 0
for model_config in all_models:
expected_forecast_results += 318 + model_config.save_gsp_sum

forecasts = db_session.query(ForecastSQL).all()
# Doubled for historic and forecast
assert len(forecasts) == expected_forecast_results * 2

# Check probabilistic added
assert "90" in forecasts[0].forecast_values[0].properties
assert "10" in forecasts[0].forecast_values[0].properties

# 72 time steps in forecast
expected_forecast_timesteps = 16

assert (
len(db_session.query(ForecastValueSQL).all())
== expected_forecast_results * expected_forecast_timesteps
)
assert (
len(db_session.query(ForecastValueLatestSQL).all())
== expected_forecast_results * expected_forecast_timesteps
)
assert (
len(db_session.query(ForecastValueSevenDaysSQL).all())
== expected_forecast_results * expected_forecast_timesteps
)
Loading