From 855db76959bfdd3ca0028b329eb7b527cb092324 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 15 Oct 2024 11:28:01 +0100 Subject: [PATCH 1/4] add back in pvnet ecmwf old model --- pvnet_app/app.py | 7 +++++-- pvnet_app/model_configs/all_models.yaml | 12 +++++++++++ pvnet_app/model_configs/pydantic_models.py | 23 +++++++++++++++++++-- tests/model_configs/test_pydantic_models.py | 14 ++++++++++++- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/pvnet_app/app.py b/pvnet_app/app.py index 802c0c8..960aced 100644 --- a/pvnet_app/app.py +++ b/pvnet_app/app.py @@ -107,6 +107,7 @@ def app( - SENTRY_DSN, optional link to sentry - ENVIRONMENT, the environment this is running in, defaults to local - USE_ECMWF_ONLY, option to use ecmwf only model, defaults to false + - USE_OCF_DATA_SAMPLER, option to use ocf_data_sampler, defaults to true Args: t0 (datetime): Datetime at which forecast is made @@ -127,18 +128,20 @@ def app( use_day_ahead_model = os.getenv("DAY_AHEAD_MODEL", "false").lower() == "true" use_ecmwf_only = os.getenv("USE_ECMWF_ONLY", "false").lower() == "true" run_extra_models = os.getenv("RUN_EXTRA_MODELS", "false").lower() == "true" + use_ocf_data_sampler = os.getenv("USE_OCF_DATA_SAMPLER", "true").lower() == "true" logger.info(f"Using `pvnet` library version: {pvnet.__version__}") logger.info(f"Using `pvnet_app` library version: {pvnet_app.__version__}") logger.info(f"Using {num_workers} workers") logger.info(f"Using day ahead model: {use_day_ahead_model}") - logger.info(f"Using ecwmwf only: {use_ecmwf_only}") + logger.info(f"Using ecmwf only: {use_ecmwf_only}") logger.info(f"Running extra models: {run_extra_models}") # load models model_configs = get_all_models(get_ecmwf_only=use_ecmwf_only, get_day_ahead_only=use_day_ahead_model, - run_extra_models=run_extra_models) + run_extra_models=run_extra_models, + use_ocf_data_sampler=use_ocf_data_sampler) logger.info(f"Using adjuster: {model_configs[0].use_adjuster}") logger.info(f"Saving GSP sum: {model_configs[0].save_gsp_sum}") diff --git a/pvnet_app/model_configs/all_models.yaml b/pvnet_app/model_configs/all_models.yaml index 6d81057..988b4e5 100644 --- a/pvnet_app/model_configs/all_models.yaml +++ b/pvnet_app/model_configs/all_models.yaml @@ -47,6 +47,17 @@ models: version: 4fe6b1441b6dd549292c201ed85eee156ecc220c ecmwf_only: True uses_satellite_data: False +# This is the old model for pvnet_ecmwf + - name: pvnet_ecmwf # this name is important as it used for blending + pvnet: + repo: openclimatefix/pvnet_uk_region + version: 35d55181a82440bdd087f380d650bfd0b64bd322 + summation: + repo: openclimatefix/pvnet_v2_summation + version: 9002baf1e9dc1ec141f3c4a1fa8447b6316a4558 + ecmwf_only: True + uses_satellite_data: False + uses_ocf_data_sampler: False # The day ahead model has not yet been re-trained with data-sampler. # It will be run with the legacy dataloader using ocf_datapipes - name: pvnet_day_ahead @@ -61,4 +72,5 @@ models: verbose: True save_gsp_to_recent: True day_ahead: True + uses_ocf_data_sampler: False diff --git a/pvnet_app/model_configs/pydantic_models.py b/pvnet_app/model_configs/pydantic_models.py index 3f3635b..e5e0a08 100644 --- a/pvnet_app/model_configs/pydantic_models.py +++ b/pvnet_app/model_configs/pydantic_models.py @@ -49,6 +49,12 @@ class Model(BaseModel): True, title="Uses Satellite Data", description="If this model uses satellite data" ) + uses_ocf_data_sampler: Optional[bool] = Field( + True, title="Uses OCF Data Sampler", description="If this model uses data sampler, old one uses ocf_datapipes" + ) + + + class Models(BaseModel): """A group of ml models""" @@ -60,8 +66,8 @@ class Models(BaseModel): @field_validator("models") @classmethod def name_must_be_unique(cls, v: List[Model]) -> List[Model]: - """Ensure that all model names are unique""" - names = [model.name for model in v] + """Ensure that all model names are unique, respect to using ocf_data_sampler or not""" + names = [(model.name,model.uses_ocf_data_sampler) for model in v] unique_names = set(names) if len(names) != len(unique_names): @@ -73,6 +79,7 @@ def get_all_models( get_ecmwf_only: Optional[bool] = False, get_day_ahead_only: Optional[bool] = False, run_extra_models: Optional[bool] = False, + use_ocf_data_sampler: Optional[bool] = True, ) -> List[Model]: """ Returns all the models for a given client @@ -81,6 +88,7 @@ def get_all_models( get_ecmwf_only: If only the ECMWF model should be returned get_day_ahead_only: If only the day ahead model should be returned run_extra_models: If extra models should be run + use_ocf_data_sampler: If the OCF Data Sampler should be used """ # load models from yaml file @@ -92,10 +100,12 @@ def get_all_models( models = config_pvnet_v2_model(models) + print(len(models.models)) if get_ecmwf_only: log.info("Using ECMWF model only") models.models = [model for model in models.models if model.ecmwf_only] + print(len(models.models)) if get_day_ahead_only: log.info("Using Day Ahead model only") models.models = [model for model in models.models if model.day_ahead] @@ -103,10 +113,19 @@ def get_all_models( log.info("Not using Day Ahead model") models.models = [model for model in models.models if not model.day_ahead] + print(len(models.models)) if not run_extra_models and not get_day_ahead_only and not get_ecmwf_only: log.info("Not running extra models") models.models = [model for model in models.models if model.name == "pvnet_v2"] + print(len(models.models)) + if use_ocf_data_sampler: + log.info("Using OCF Data Sampler") + models.models = [model for model in models.models if model.uses_ocf_data_sampler] + else: + log.info("Not using OCF Data Sampler, using ocf_datapipes") + models.models = [model for model in models.models if not model.uses_ocf_data_sampler] + return models.models diff --git a/tests/model_configs/test_pydantic_models.py b/tests/model_configs/test_pydantic_models.py index 585a55e..e21f23a 100644 --- a/tests/model_configs/test_pydantic_models.py +++ b/tests/model_configs/test_pydantic_models.py @@ -18,7 +18,7 @@ def test_get_all_models_get_ecmwf_only(): def test_get_all_models_get_day_ahead_only(): """Test for getting all models with ecmwf_only""" - models = get_all_models(get_day_ahead_only=True) + models = get_all_models(get_day_ahead_only=True, use_ocf_data_sampler=False) assert len(models) == 1 assert models[0].day_ahead @@ -28,3 +28,15 @@ def test_get_all_models_run_extra_models(): models = get_all_models(run_extra_models=True) assert len(models) == 5 + +def test_get_all_models_ocf_data_sampler(): + """Test for getting all models with ecmwf_only""" + models = get_all_models(use_ocf_data_sampler=True, run_extra_models=True) + assert len(models) == 5 + + models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True) + assert len(models) == 1 + + models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True, get_day_ahead_only=True) + assert len(models) == 1 + From 0e673ea391c7b59dc1346a50ddd7dabb8819186e Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 15 Oct 2024 11:36:20 +0100 Subject: [PATCH 2/4] add old pvnet main model back in. Add test to check it runs --- pvnet_app/model_configs/all_models.yaml | 14 +++- tests/model_configs/test_pydantic_models.py | 2 +- tests/test_app.py | 73 ++++++++++++++++++++- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/pvnet_app/model_configs/all_models.yaml b/pvnet_app/model_configs/all_models.yaml index 988b4e5..7d87a2f 100644 --- a/pvnet_app/model_configs/all_models.yaml +++ b/pvnet_app/model_configs/all_models.yaml @@ -47,7 +47,19 @@ models: version: 4fe6b1441b6dd549292c201ed85eee156ecc220c ecmwf_only: True uses_satellite_data: False -# This is the old model for pvnet_ecmwf +# This is the old model for pvnet and pvnet_ecmwf + - name: pvnet_v2 + pvnet: + repo: openclimatefix/pvnet_uk_region + version: 62e5e20ab793cee7cf94eadac870d2199501a730 + summation: + repo: openclimatefix/pvnet_v2_summation + version: ffac655f9650b81865d96023baa15839f3ce26ec + use_adjuster: True + save_gsp_sum: False + verbose: True + save_gsp_to_recent: True + uses_ocf_data_sampler: False - name: pvnet_ecmwf # this name is important as it used for blending pvnet: repo: openclimatefix/pvnet_uk_region diff --git a/tests/model_configs/test_pydantic_models.py b/tests/model_configs/test_pydantic_models.py index e21f23a..327a3e3 100644 --- a/tests/model_configs/test_pydantic_models.py +++ b/tests/model_configs/test_pydantic_models.py @@ -35,7 +35,7 @@ def test_get_all_models_ocf_data_sampler(): assert len(models) == 5 models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True) - assert len(models) == 1 + assert len(models) == 2 models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True, get_day_ahead_only=True) assert len(models) == 1 diff --git a/tests/test_app.py b/tests/test_app.py index 797a3cd..3746c47 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -111,6 +111,7 @@ def test_app_day_ahead_model( os.environ["DAY_AHEAD_MODEL"] = "True" os.environ["RUN_EXTRA_MODELS"] = "False" + os.environ["USE_OCF_DATA_SAMPLER"] = "False" # Run prediction # Thes import needs to come after the environ vars have been set @@ -118,7 +119,7 @@ def test_app_day_ahead_model( app(gsp_ids=list(range(1, 318)), num_workers=2) - all_models = get_all_models(get_day_ahead_only=True) + all_models = get_all_models(get_day_ahead_only=True, use_ocf_data_sampler=False) # Check correct number of forecasts have been made # (317 GSPs + 1 National + maybe GSP-sum) = 318 or 319 forecasts @@ -213,4 +214,72 @@ def test_app_no_sat( expected_forecast_results += 317 * model_config.save_gsp_to_recent expected_forecast_results += model_config.save_gsp_sum # optional Sum of GSPs - assert len(db_session.query(ForecastValueSevenDaysSQL).all()) == expected_forecast_results * 16 \ No newline at end of file + assert len(db_session.query(ForecastValueSevenDaysSQL).all()) == expected_forecast_results * 16 + + +# test legacy models +# Its nice to have this here, so we can run the latest version in production, but still use the old models +# Once we have re trained PVnet summation models we can remove this +def test_app_ocf_datapipes( + db_session, nwp_ukv_data, nwp_ecmwf_data, sat_5_data, gsp_yields_and_systems, me_latest +): + """Test the app running the day ahead model""" + + with tempfile.TemporaryDirectory() as tmpdirname: + os.chdir(tmpdirname) + + temp_nwp_path = "temp_nwp_ukv.zarr" + os.environ["NWP_UKV_ZARR_PATH"] = temp_nwp_path + nwp_ukv_data.to_zarr(temp_nwp_path) + + temp_nwp_path = "temp_nwp_ecmwf.zarr" + os.environ["NWP_ECMWF_ZARR_PATH"] = temp_nwp_path + nwp_ecmwf_data.to_zarr(temp_nwp_path) + + temp_sat_path = "temp_sat.zarr.zip" + os.environ["SATELLITE_ZARR_PATH"] = temp_sat_path + with zarr.storage.ZipStore(temp_sat_path, mode="x") as store: + sat_5_data.to_zarr(store) + + os.environ["DAY_AHEAD_MODEL"] = "False" + os.environ["RUN_EXTRA_MODELS"] = "False" + os.environ["USE_OCF_DATA_SAMPLER"] = "False" + + # Run prediction + # Thes import needs to come after the environ vars have been set + from pvnet_app.app import app + + app(gsp_ids=list(range(1, 318)), num_workers=2) + + all_models = get_all_models(use_ocf_data_sampler=False) + + # Check correct number of forecasts have been made + # (317 GSPs + 1 National + maybe GSP-sum) = 318 or 319 forecasts + # Forecast made with multiple models + expected_forecast_results = 0 + for model_config in all_models: + expected_forecast_results += 318 + model_config.save_gsp_sum + + forecasts = db_session.query(ForecastSQL).all() + # Doubled for historic and forecast + assert len(forecasts) == expected_forecast_results * 2 + + # Check probabilistic added + assert "90" in forecasts[0].forecast_values[0].properties + assert "10" in forecasts[0].forecast_values[0].properties + + # 72 time steps in forecast + expected_forecast_timesteps = 72 + + assert ( + len(db_session.query(ForecastValueSQL).all()) + == expected_forecast_results * expected_forecast_timesteps + ) + assert ( + len(db_session.query(ForecastValueLatestSQL).all()) + == expected_forecast_results * expected_forecast_timesteps + ) + assert ( + len(db_session.query(ForecastValueSevenDaysSQL).all()) + == expected_forecast_results * expected_forecast_timesteps + ) \ No newline at end of file From d5c24076815fd829badb8ff38ba22603a0bc2277 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 15 Oct 2024 12:06:42 +0100 Subject: [PATCH 3/4] fix for legacy test --- tests/test_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_app.py b/tests/test_app.py index 3746c47..9b789b2 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -269,7 +269,7 @@ def test_app_ocf_datapipes( assert "10" in forecasts[0].forecast_values[0].properties # 72 time steps in forecast - expected_forecast_timesteps = 72 + expected_forecast_timesteps = 16 assert ( len(db_session.query(ForecastValueSQL).all()) From 1b2a45a567bead16a87821452392d37678eda339 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 15 Oct 2024 12:42:02 +0100 Subject: [PATCH 4/4] fix test --- pvnet_app/model_configs/pydantic_models.py | 4 ---- tests/test_app.py | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pvnet_app/model_configs/pydantic_models.py b/pvnet_app/model_configs/pydantic_models.py index e5e0a08..8f37bf1 100644 --- a/pvnet_app/model_configs/pydantic_models.py +++ b/pvnet_app/model_configs/pydantic_models.py @@ -100,12 +100,10 @@ def get_all_models( models = config_pvnet_v2_model(models) - print(len(models.models)) if get_ecmwf_only: log.info("Using ECMWF model only") models.models = [model for model in models.models if model.ecmwf_only] - print(len(models.models)) if get_day_ahead_only: log.info("Using Day Ahead model only") models.models = [model for model in models.models if model.day_ahead] @@ -113,12 +111,10 @@ def get_all_models( log.info("Not using Day Ahead model") models.models = [model for model in models.models if not model.day_ahead] - print(len(models.models)) if not run_extra_models and not get_day_ahead_only and not get_ecmwf_only: log.info("Not running extra models") models.models = [model for model in models.models if model.name == "pvnet_v2"] - print(len(models.models)) if use_ocf_data_sampler: log.info("Using OCF Data Sampler") models.models = [model for model in models.models if model.uses_ocf_data_sampler] diff --git a/tests/test_app.py b/tests/test_app.py index 9b789b2..3c9345f 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -175,6 +175,7 @@ def test_app_no_sat( os.environ["RUN_EXTRA_MODELS"] = "True" os.environ["SAVE_GSP_SUM"] = "True" os.environ["DAY_AHEAD_MODEL"] = "False" + os.environ["USE_OCF_DATA_SAMPLER"] = "True" # Run prediction # Thes import needs to come after the environ vars have been set