From 35b9dd12cd3313999de36f4e51d1e986b76a2efc Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 29 Jul 2024 16:48:45 +0100 Subject: [PATCH 01/90] Add multiple models to rose edit configuration Also gone through and removed some unneeded things, added titles, etc. Still need to go through the diagnostics. Split diagnostics into sections, add input variable table Also link in more models, so 10 are now supported. Improvements to rose edit metadata Fix a bunch of validation issues --- cset-workflow/flow.cylc | 64 +- .../plot_spatial_surface_model_field.cylc | 8 +- cset-workflow/meta/rose-meta.conf | 1424 +++++++++++++++-- 3 files changed, 1280 insertions(+), 216 deletions(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index d47328ec0..271f2f135 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -4,44 +4,30 @@ title = CSET description = Workflow for running CSET. URL = https://metoffice.github.io/CSET -[scheduler] - UTC mode = True - [scheduling] runahead limit = P{{CSET_RUNAHEAD_LIMIT}} - initial cycle point = {{CSET_INITIAL_CYCLE_POINT}} - final cycle point = {{CSET_FINAL_CYCLE_POINT}} + initial cycle point = 1000-01-01 [[graph]] # Only runs on the first cycle. R1/^ = """ - build_conda & install_website_skeleton => FETCH_DATA - """ - - # Only runs on the final cycle. - R1/$ = """ - process_finish => COLLATE:succeed-all => - finish_website => send_email => housekeeping_full + build_conda => install_local_cset + build_conda => install_website_skeleton + install_local_cset & install_website_skeleton => setup_complete """ - # Runs every cycle to process the data in parallel. - {{CSET_CYCLE_PERIOD}} = """ - install_website_skeleton[^] & build_conda[^] => - FETCH_DATA:succeed-all => PARALLEL:succeed-all => - process_finish => housekeeping_raw - - # Intercycle dependence with this task ensures the collate step waits for - # the required data. - process_finish[-{{CSET_CYCLE_PERIOD}}] => process_finish + # Runs for every forecast initiation time to process the data in parallel. + {% for date in CSET_CASE_DATES %} + R1/{{date}} = """ + setup_complete[^] => FETCH_DATA:succeed-all => PROCESS:finish-all => process_finish => housekeeping_raw """ + {% endfor %} - {% if CSET_INCREMENTAL_OUTPUT %} - # Runs every so often to update output plots during runtime. - {{CSET_INCREMENTAL_OUTPUT_PERIOD}} = """ - COLLATE[-{{CSET_INCREMENTAL_OUTPUT_PERIOD}}]:finish-all & - process_finish => COLLATE + # Only runs on the final cycle. + R1/$ = """ + PROCESS:finish-all => finish_website => send_email + PROCESS:finish-all => housekeeping_full """ - {% endif %} [runtime] [[root]] @@ -74,15 +60,8 @@ URL = https://metoffice.github.io/CSET COLORBAR_FILE = {{COLORBAR_FILE}} PLOT_RESOLUTION = {{PLOT_RESOLUTION}} - [[PARALLEL]] + [[PROCESS]] script = rose task-run -v --app-key=run_cset_recipe - [[[environment]]] - CSET_BAKE_MODE = parallel - - [[COLLATE]] - script = rose task-run -v --app-key=run_cset_recipe - [[[environment]]] - CSET_BAKE_MODE = collate [[FETCH_DATA]] @@ -95,20 +74,13 @@ URL = https://metoffice.github.io/CSET ROSE_APP_OPT_CONF_KEYS = {{METPLUS_OPT_CONFIG_KEYS}} {% endif %} - # Dummy tasks needed for workflow scheduling. - [[process_finish]] + [[DUMMY_TASK]] script = true platform = localhost + execution time limit = PT1M - [[dummy_collate]] - inherit = COLLATE - script = true - platform = localhost - - [[dummy_parallel]] - inherit = PARALLEL - script = true - platform = localhost + [[setup_complete]] + inherit = DUMMY_TASK [[build_conda]] # Create the conda environment if it does not yet exist, possibly installing diff --git a/cset-workflow/includes/plot_spatial_surface_model_field.cylc b/cset-workflow/includes/plot_spatial_surface_model_field.cylc index 14cc5e51e..aa4bc11db 100644 --- a/cset-workflow/includes/plot_spatial_surface_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_surface_model_field.cylc @@ -2,13 +2,7 @@ {% for model_field in SURFACE_MODEL_FIELDS %} [runtime] [[generic_spatial_plot_time_series_{{model_field}}]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "generic_surface_spatial_plot_sequence.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}}" - - [[generic_spatial_plot_time_series_collation_{{model_field}}]] - inherit = COLLATE + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_surface_spatial_plot_sequence.yaml" CSET_ADDOPTS = "--VARNAME={{model_field}}" diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index e5bfbd9cb..96e77de43 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -1,4 +1,4 @@ -# Workflow's configuration metadata +# Workflow’s configuration metadata [template variables] ################################################################################ @@ -7,6 +7,7 @@ [template variables=SITE] ns=Environment +title=Site description=Which institution to load the site-specific configuration for. help=The site-specific configuration should live in a file under site/ For example the Met Office configuration lives under "site/metoffice.cylc". @@ -30,6 +31,7 @@ sort-key=modules1 [template variables=MODULES_PURGE] ns=Environment +title=Purge modules description=Purge modules that are not explicitly specified. help=Remove any modules that are loaded by default before loading the specified ones. This is recommended as it makes your workflows more explicit and less @@ -58,6 +60,7 @@ sort-key=conda1 [template variables=CONDA_PATH] ns=Environment +title=Conda executable path description=Where to get conda from. Leave blank if conda already on path. help=Path where the conda executable is. This should be to a directory, rather to a binary itself. Leave this field blank if conda comes from another @@ -67,6 +70,7 @@ sort-key=conda2 [template variables=CONDA_VENV_CREATE] ns=Environment +title=Create conda environment description=Whether to (re)create the conda environment. help=When enabled it will check that the conda environment exists and is up-to-date, recreating it otherwise. @@ -75,15 +79,21 @@ sort-key=conda2 [template variables=CONDA_VENV_LOCATION] ns=Environment +title=Conda environment location description=Location of the conda virtual environment. -help=The can be found by running `conda env list` and copying the path displayed - there. If unspecified it defaults to the workflow share directory. +help=Existing environments can be found by running `conda info --envs` and + copying the path displayed there. If unspecified it defaults to the workflow + share directory. + + If creating the conda environment as part of the workflow, it will only be + created if it does not exist or is out of date. type=quoted sort-key=conda2 [template variables=CSET_ENV_USE_LOCAL_CSET] ns=Environment -description=Whether to use a local version of CSET +title=Use local CSET +description=Whether to use a local version of CSET, rather than the bundled one. help=Use a local version of CSET, rather than the version provided from conda. This is especially useful for running development versions. type=python_boolean @@ -93,6 +103,7 @@ sort-key=localcset1 [template variables=CSET_LOCAL_CSET_PATH] ns=Environment +title=Local CSET path description=Path to local copy of CSET. help=Path to either a wheel file, or a checked out copy of the CSET git repo. For the repository, it should be the path to the directory containing the @@ -142,14 +153,17 @@ sort-key=met2 [template variables=LOGLEVEL] ns=General +title=Logging level description=How detailed the logs should be. -help=INFO only shows a general overview of what is happening, while DEBUG enables - in-operator logging, but produces a lot of spurious output. +help=INFO only shows a general overview of what is happening, while DEBUG + enables in-operator logging, but produces a lot of output. values="INFO", "DEBUG" +value-titles=Info, Debug compulsory=true [template variables=HOUSEKEEPING_MODE] ns=General +title=Housekeeping mode description=How much housekeeping deletes. help=None will delete nothing. @@ -167,7 +181,7 @@ ns=General description=Filepath and name for colorbar details of each variable i.e. name_of_filepath/name_of_filename. An example file is available under CSET/cset-workflow/extra-meta/colorbar_dict_alphabetical.json -help= +help=TODO type=quoted compulsory=true @@ -192,169 +206,1292 @@ sort-key=web2 [template variables=WEB_ADDR] ns=General +title=Website address description=The address at which the website is served. help=This should be the address where your public_html or equivalent is served. It might include a partial path, such as your username. + + E.g: https://example.com/~username/ type=quoted compulsory=true sort-key=web1 +[template variables=WEB_DIR] +ns=General +title=Web directory +description=Path to directory that is served by the webserver. +help=This will probably be under $HOME/public_html or similar. You will want to + ensure a subfolder is used as it will be replaced with a symbolic link. + + This is where the output of the workflow will be accessible from, through a + symlink to the workflow shared directory. Anything existing at the path will + be removed. + + E.g: $HOME/public_html/CSET +type=quoted +compulsory=true +sort-key=web2 + [template variables=CSET_RUNAHEAD_LIMIT] ns=General +title=Concurrent cycle limit description=Number of simultaneous cycles. help=The maximum number of cycles run in parallel. A larger number here will finish quicker, but utilise more compute resources at once. For a large enough workflow it may overwhelm the batch submission system, so it is - recommended to keep this below 10. Must be positive. + recommended to keep this below 10. type=integer +range=0: compulsory=true ################################################################################ -# Data and Cycling +# Models and Cases ################################################################################ -[template variables=CSET_INITIAL_CYCLE_POINT] -ns=Data and Cycling -description=Datetime of initial cycle point. -help=This should be an ISO 8601 datetime string indicating when you want the - cycling component of CSET to begin. E.g. 2000-01-01T00:00Z. Ensure that it - is consistent with your data's starting validity time. If not then a warning - that cubes can not be loaded is raised. +[template variables=CSET_CYCLING_MODE] +ns=Models and Cases +title=Cycling mode +description=Process case studies, or a continuous trial. +values="Case Study", "Trial" +trigger=template variables=CSET_CASE_DATES: "Case Study"; + template variables=CSET_TRIAL_START_DATE: "Trial"; + template variables=CSET_TRIAL_END_DATE: "Trial"; + template variables=CSET_TRIAL_CYCLE_PERIOD: "Trial"; +compulsory=true +sort-key=a1 + +# Case study settings. +[template variables=CSET_CASE_DATES] +ns=Models and Cases +title=Case study dates +description=List of datetimes of cases. +help=This should be a python list of ISO 8601 datetime strings indicating the + forecast initiation time (AKA data time) of the data. E.g. 2000-01-01T00:00Z + Ensure that it is consistent with your data’s first validity time. If not + then a warning that cubes can not be loaded is raised. +type=python_list +compulsory=true +sort-key=b1 + +# Trials settings. +[template variables=CSET_TRIAL_START_DATE] +ns=Models and Cases +title=Trial start date +description=Start date of the trial. +help=The start date of the trial, in ISO 8601 format. This is the first date + that the workflow will run from. For example: 2000-01-01T00:00Z +type=quoted +compulsory=true +sort-key=c1 + +[template variables=CSET_TRIAL_END_DATE] +ns=Models and Cases +title=Trial end date +description=End date of the trial. If blank, the workflow will run indefinitely. +help=The end date of the trial, in ISO 8601 format. This is the last date that + the workflow will run to. If blank, the workflow will run indefinitely. + For example: 2000-01-01T00:00Z +type=quoted +compulsory=true +sort-key=c2 + +[template variables=CSET_TRIAL_CYCLE_PERIOD] +ns=Models and Cases +title=Trial cycle period +description=The period between model runs of the trial. +help=The cycle period of the trial, as an ISO 8601 duration. This is the time + between each forecast initiation of the model. For example: PT48H +type=quoted +compulsory=true +sort-key=c3 + +[template variables=CSET_ANALYSIS_PERIOD] +ns=Models and Cases +title=Analysis period +description=The length of forecast to analyse, AKA forecast length. +help=The period over which to produce diagnostics for the forecast, in ISO 8601 duration format. + For example: PT48H + + This controls how much data is fetched and fed into CSET for analysis. type=quoted compulsory=true -sort-key=cycle1 +sort-key=d1 -[template variables=CSET_FINAL_CYCLE_POINT] -ns=Data and Cycling -description=Datetime of final cycle point. -help=This should be an ISO 8601 datetime string indicating when you want the - cycling component of CSET to finish. E.g. 2000-01-01T00:00Z +[template variables=CSET_ANALYSIS_OFFSET] +ns=Models and Cases +title=Analysis offset +description=Offset from forecast initiation to verification start. +help=The offset in time between the forecast initiation and the start of the + analysis period. This is useful when needed fields are not output on the + first time step. For example: PT1H type=quoted compulsory=true -sort-key=cycle2 +sort-key=d2 -[template variables=CSET_CYCLE_PERIOD] -ns=Data and Cycling -description=Period between each output. -help=ISO 8601 duration for the length of the data in each cycle. For 3-hourly - data this would be PT3H. For hourly data choose PT1H. +[template variables=CSET_MODEL_COUNT] +ns=Models and Cases +title=Number of models +description=Number of models to evaluate. +help=The number of models to evaluate, between 1 and 10. If more models + are needed, it is straightforward to change upon request. +type=integer +range=1:10 +compulsory=true +sort-key=e1 +# Need to link to all of the model settings here. +trigger=template variables=m01_data_source: this >= 1; + template variables=m01_data_path: this >= 1; + template variables=m01_date_type: this >= 1; + template variables=m01_data_period: this >= 1; + template variables=m01_preprocessing: this >= 1; + template variables=m01_preprocessing_recipe: this >= 1; + + template variables=m02_data_source: this >= 2; + template variables=m02_data_path: this >= 2; + template variables=m02_date_type: this >= 2; + template variables=m02_data_period: this >= 2; + template variables=m02_preprocessing: this >= 2; + template variables=m02_preprocessing_recipe: this >= 2; + + template variables=m03_data_source: this >= 3; + template variables=m03_data_path: this >= 3; + template variables=m03_date_type: this >= 3; + template variables=m03_data_period: this >= 3; + template variables=m03_preprocessing: this >= 3; + template variables=m03_preprocessing_recipe: this >= 3; + + template variables=m04_data_source: this >= 4; + template variables=m04_data_path: this >= 4; + template variables=m04_date_type: this >= 4; + template variables=m04_data_period: this >= 4; + template variables=m04_preprocessing: this >= 4; + template variables=m04_preprocessing_recipe: this >= 4; + + template variables=m05_data_source: this >= 5; + template variables=m05_data_path: this >= 5; + template variables=m05_date_type: this >= 5; + template variables=m05_data_period: this >= 5; + template variables=m05_preprocessing: this >= 5; + template variables=m05_preprocessing_recipe: this >= 5; + + template variables=m06_data_source: this >= 6; + template variables=m06_data_path: this >= 6; + template variables=m06_date_type: this >= 6; + template variables=m06_data_period: this >= 6; + template variables=m06_preprocessing: this >= 6; + template variables=m06_preprocessing_recipe: this >= 6; + + template variables=m07_data_source: this >= 7; + template variables=m07_data_path: this >= 7; + template variables=m07_date_type: this >= 7; + template variables=m07_data_period: this >= 7; + template variables=m07_preprocessing: this >= 7; + template variables=m07_preprocessing_recipe: this >= 7; + + template variables=m08_data_source: this >= 8; + template variables=m08_data_path: this >= 8; + template variables=m08_date_type: this >= 8; + template variables=m08_data_period: this >= 8; + template variables=m08_preprocessing: this >= 8; + template variables=m08_preprocessing_recipe: this >= 8; + + template variables=m09_data_source: this >= 9; + template variables=m09_data_path: this >= 9; + template variables=m09_date_type: this >= 9; + template variables=m09_data_period: this >= 9; + template variables=m09_preprocessing: this >= 9; + template variables=m09_preprocessing_recipe: this >= 9; + + template variables=m10_data_source: this >= 10; + template variables=m10_data_path: this >= 10; + template variables=m10_date_type: this >= 10; + template variables=m10_data_period: this >= 10; + template variables=m10_preprocessing: this >= 10; + template variables=m10_preprocessing_recipe: this >= 10; + + +################################################################################ +# Model definitions. Repeat section for each desired model. +################################################################################ + +# # Model ?? +# [template variables=m??_data_source] +# ns=Models and Cases/Model ?? +# title=Data source +# description=From whence to retrieve the forecast. +# help=System from which to retrieve the forecast data. Filesystem is generic, +# while the others are site-specific. +# values="filesystem", "mass" +# value-titles=Filesystem, MASS +# compulsory=true +# sort-key=a1 + +# [template variables=m??_data_path] +# ns=Models and Cases/Model ?? +# title=Data path +# description=The path to the forecast. +# help=Full path (including file name) to the forecast data on your chosen storage +# system. Can contain wildcards. No quotation marks required in rose edit, as +# it is already quoted there. +# +# strftime format strings are supported, and will be replaced with the +# desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` +# +# For forecast lead time, %N can be used which is replaced by the 3 digit +# lead time in hours. The regular placeholders are still usable, and refer +# to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` +# +# Commonly useful placeholders: +# %Y: Year, e.g: 2024 +# %m: Month, e.g: 12 +# %d: Day, e.g: 31 +# %H: Hour, e.g: 23 +# %M: Minute, e.g: 59 +# %N: Forecast lead time in hours, e.g: 012 +# +# For more on strftime format strings, see: https://strftime.org +# type=quoted +# compulsory=true +# sort-key=a2 + +# [template variables=m??_date_type] +# ns=Models and Cases/Model ?? +# title=Date placeholder type +# description=Type of date templated into the data path. +# help=The type of date templated into the data path. Affects interpretation of +# strftime placeholders in the data path. + +# Validity time is when the data is predicting for. + +# Forecast initiation time, AKA data time, is the time of the validity time of +# the first data point. For realtime forecasts this is approximately when the +# forecast was started. + +# Forecast lead time is how far from the forecast initiation time the data is. +# values="initiation", "validity", "lead" +# value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +# compulsory=true +# trigger=template variables=m??_data_period: "validity", "lead"; +# sort-key=b1 + +# [template variables=m??_data_period] +# ns=Models and Cases/Model ?? +# title=Data period +# description=The period of the underlying data +# help=The period of the input data in each file as an ISO 8601 duration. This +# is used to fetch files at regular validity or lead times. +# E.g. for hourly data: PT1H +# type=quoted +# compulsory=true +# sort-key=b2 + +# [template variables=m??_preprocessing] +# ns=Models and Cases/Model ?? +# title=Preprocess +# description=Preprocess all of the model data. +# help=Whether to preprocess all of the model data. This is useful for applying +# any necessary transformations to the data before it is used in the workflow, +# such as removing boundary regions. +# type=python_boolean +# compulsory=true +# trigger=template variables=m??_preprocessing_recipe: True; +# sort-key=c1 + +# [template variables=m??_preprocessing_recipe] +# ns=Models and Cases/Model ?? +# title=Preprocessing recipe +# description=The preprocessing recipe to use. NOTE: Not yet implemented. +# help=The preprocessing recipe to use. +# type=quoted +# compulsory=true +# sort-key=c2 + + +# Model 01 +[template variables=m01_data_source] +ns=Models and Cases/Model 01 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m01_data_path] +ns=Models and Cases/Model 01 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org type=quoted compulsory=true +sort-key=a2 -[template variables=CSET_INCREMENTAL_OUTPUT] -ns=Data and Cycling -description=Write partial output before cycle end. -help=Whether to write partial output plots before the end of the workflow. - Enabling this will slow the workflow, but allows for inspecting output before - the workflow is complete. +[template variables=m01_date_type] +ns=Models and Cases/Model 01 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. - Only recommended for long running (multi-hour+) workflows. + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m01_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m01_data_period] +ns=Models and Cases/Model 01 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m01_preprocessing] +ns=Models and Cases/Model 01 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. type=python_boolean -trigger=template variables=CSET_INCREMENTAL_OUTPUT_PERIOD: True; compulsory=true -sort-key=incr_out1 +trigger=template variables=m01_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m01_preprocessing_recipe] +ns=Models and Cases/Model 01 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 -[template variables=CSET_INCREMENTAL_OUTPUT_PERIOD] -ns=Data and Cycling -description=Period of updating output plots. -help=ISO 8601 duration for the length of time between updating the output plots. - Smaller values will frequently update the output plots, but use more compute. - To avoid races, the total length of the cycles must be an integer multiple - of the CSET_CYCLE_PERIOD. +# Model 02 +[template variables=m02_data_source] +ns=Models and Cases/Model 02 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m02_data_path] +ns=Models and Cases/Model 02 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m02_date_type] +ns=Models and Cases/Model 02 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m02_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m02_data_period] +ns=Models and Cases/Model 02 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H type=quoted compulsory=true -sort-key=incr_out2 +sort-key=b2 -# Input data settings. -[template variables=FETCH_FCST_OPT_CONF] -ns=Data and Cycling +[template variables=m02_preprocessing] +ns=Models and Cases/Model 02 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m02_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m02_preprocessing_recipe] +ns=Models and Cases/Model 02 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + + +# Model 03 +[template variables=m03_data_source] +ns=Models and Cases/Model 03 +title=Data source description=From whence to retrieve the forecast. help=System from which to retrieve the forecast data. Filesystem is generic, while the others are site-specific. values="filesystem", "mass" value-titles=Filesystem, MASS compulsory=true -sort-key=input1 +sort-key=a1 -[template variables=CSET_INPUT_FILE_PATH] -ns=Data and Cycling +[template variables=m03_data_path] +ns=Models and Cases/Model 03 +title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage - system. Can contain wildcards. No quotation marks required. -type=quoted -compulsory=true -sort-key=input2 - -[template variables=CSET_INCREMENTAL_DATA_FETCH] -ns=Data and Cycling -description=Collect only the needed input files for each time step. -help=Whether to collect input data only when it is needed. This requires some - additional setup, so is only recommended when your input data is large - relative to your disk space. -type=python_boolean -trigger=template variables=CSET_FILE_NAME_METADATA_PATTERN: True; - template variables=CSET_TIMES_PER_FILE: True; - template variables=CSET_FILE_TIME_OFFSET: True; -compulsory=true -sort-key=incr_in1 - -[template variables=CSET_FILE_NAME_METADATA_PATTERN] -ns=Data and Cycling -description=Template string identifying the metadata within the filename. -help=The pattern format is the filename with a number of placeholders added to - mark where the time information is. You must have enough information to - get the validity time, either directly from the validity time, or derived - from the initiation time and lead time. Placeholders marked with † are optional. - - Validity time placeholders: - * ``{valid_year}`` - * ``{valid_month}`` - * ``{valid_word_month}`` - * ``{valid_day}`` - * ``{valid_hour}``† - * ``{valid_minute}``† - - Initiation time placeholders: - * ``{init_year}`` - * ``{init_month}`` Numeric month, e.g: 02 - * ``{init_word_month}`` Wordy month, e.g: feb - * ``{init_day}`` - * ``{init_hour}``† - * ``{init_minute}``† - * ``{lead_hour}`` -type=quoted -compulsory=true -sort-key=incr_in2 - -[template variables=CSET_TIMES_PER_FILE] -ns=Data and Cycling -description=Number of cycle periods in each file. -help=The number of cycle periods (as defined by CSET_CYCLE_PERIOD) within each input - file. Normally this will be the number of timesteps per file. -type=integer + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted compulsory=true -sort-key=incr_in2 +sort-key=a2 -[template variables=CSET_FILE_TIME_OFFSET] -ns=Data and Cycling -description=Offset between filename time and data time. -help=Indicates the offset in time periods between the marked validity time - and the earliest time in the file. +[template variables=m03_date_type] +ns=Models and Cases/Model 03 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. - E.g. if the filename time was 06:00, then +2 would mean the first contained - time was 04:00, while -2 would mean the first time was 08:00. -type=integer + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m03_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m03_data_period] +ns=Models and Cases/Model 03 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m03_preprocessing] +ns=Models and Cases/Model 03 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m03_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m03_preprocessing_recipe] +ns=Models and Cases/Model 03 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + + +# Model 04 +[template variables=m04_data_source] +ns=Models and Cases/Model 04 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m04_data_path] +ns=Models and Cases/Model 04 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m04_date_type] +ns=Models and Cases/Model 04 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m04_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m04_data_period] +ns=Models and Cases/Model 04 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m04_preprocessing] +ns=Models and Cases/Model 04 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m04_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m04_preprocessing_recipe] +ns=Models and Cases/Model 04 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted compulsory=true -sort-key=incr_in2 +sort-key=c2 + + +# Model 05 +[template variables=m05_data_source] +ns=Models and Cases/Model 05 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m05_data_path] +ns=Models and Cases/Model 05 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m05_date_type] +ns=Models and Cases/Model 05 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m05_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m05_data_period] +ns=Models and Cases/Model 05 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m05_preprocessing] +ns=Models and Cases/Model 05 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m05_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m05_preprocessing_recipe] +ns=Models and Cases/Model 05 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + + +# Model 06 +[template variables=m06_data_source] +ns=Models and Cases/Model 06 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m06_data_path] +ns=Models and Cases/Model 06 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m06_date_type] +ns=Models and Cases/Model 06 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m06_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m06_data_period] +ns=Models and Cases/Model 06 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m06_preprocessing] +ns=Models and Cases/Model 06 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m06_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m06_preprocessing_recipe] +ns=Models and Cases/Model 06 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + + +# Model 07 +[template variables=m07_data_source] +ns=Models and Cases/Model 07 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m07_data_path] +ns=Models and Cases/Model 07 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m07_date_type] +ns=Models and Cases/Model 07 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m07_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m07_data_period] +ns=Models and Cases/Model 07 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m07_preprocessing] +ns=Models and Cases/Model 07 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m07_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m07_preprocessing_recipe] +ns=Models and Cases/Model 07 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + + +# Model 08 +[template variables=m08_data_source] +ns=Models and Cases/Model 08 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m08_data_path] +ns=Models and Cases/Model 08 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m08_date_type] +ns=Models and Cases/Model 08 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m08_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m08_data_period] +ns=Models and Cases/Model 08 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m08_preprocessing] +ns=Models and Cases/Model 08 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m08_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m08_preprocessing_recipe] +ns=Models and Cases/Model 08 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + + +# Model 09 +[template variables=m09_data_source] +ns=Models and Cases/Model 09 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m09_data_path] +ns=Models and Cases/Model 09 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m09_date_type] +ns=Models and Cases/Model 09 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m09_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m09_data_period] +ns=Models and Cases/Model 09 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m09_preprocessing] +ns=Models and Cases/Model 09 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m09_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m09_preprocessing_recipe] +ns=Models and Cases/Model 09 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + + +# Model 10 +[template variables=m10_data_source] +ns=Models and Cases/Model 10 +title=Data source +description=From whence to retrieve the forecast. +help=System from which to retrieve the forecast data. Filesystem is generic, + while the others are site-specific. +values="filesystem", "mass" +value-titles=Filesystem, MASS +compulsory=true +sort-key=a1 + +[template variables=m10_data_path] +ns=Models and Cases/Model 10 +title=Data path +description=The path to the forecast. +help=Full path (including file name) to the forecast data on your chosen storage + system. Can contain wildcards. No quotation marks required in rose edit, as + it is already quoted there. + + strftime format strings are supported, and will be replaced with the + desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` + + For forecast lead time, %N can be used which is replaced by the 3 digit + lead time in hours. The regular placeholders are still usable, and refer + to the initiation time E.g: `/data/case_%Y%m%d/model1_%N.nc` + + Commonly useful placeholders: + %Y: Year, e.g: 2024 + %m: Month, e.g: 12 + %d: Day, e.g: 31 + %H: Hour, e.g: 23 + %M: Minute, e.g: 59 + %N: Forecast lead time in hours, e.g: 012 + + For more on strftime format strings, see: https://strftime.org +type=quoted +compulsory=true +sort-key=a2 + +[template variables=m10_date_type] +ns=Models and Cases/Model 10 +title=Date placeholder type +description=Type of date templated into the data path. +help=The type of date templated into the data path. Affects interpretation of + strftime placeholders in the data path. + + Forecast initiation time, AKA data time, is the time of the validity time of + the first data point. For realtime forecasts this is approximately when the + forecast was started. + + Validity time is when the data is predicting for. + + Forecast lead time is how far from the forecast initiation time the data is. +values="initiation", "validity", "lead" +value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time +compulsory=true +trigger=template variables=m10_data_period: "validity", "lead"; +sort-key=b1 + +[template variables=m10_data_period] +ns=Models and Cases/Model 10 +title=Data period +description=The period of the underlying data +help=The period of the input data in each file as an ISO 8601 duration. This + is used to fetch files at regular validity or lead times. + E.g. for hourly data: PT1H +type=quoted +compulsory=true +sort-key=b2 + +[template variables=m10_preprocessing] +ns=Models and Cases/Model 10 +title=Preprocess +description=Preprocess all of the model data. +help=Whether to preprocess all of the model data. This is useful for applying + any necessary transformations to the data before it is used in the workflow, + such as removing boundary regions. +type=python_boolean +compulsory=true +trigger=template variables=m10_preprocessing_recipe: True; +sort-key=c1 + +[template variables=m10_preprocessing_recipe] +ns=Models and Cases/Model 10 +title=Preprocessing recipe +description=The preprocessing recipe to use. NOTE: Not yet implemented. +help=The preprocessing recipe to use. +type=quoted +compulsory=true +sort-key=c2 + ################################################################################ # Diagnostics ################################################################################ +[template variables=SURFACE_MODEL_FIELDS] +ns=Diagnostics/Quicklook +title=Surface model fields +description=Per model field names. +help=Variable names for surface variables. The names across a row should match + the same physical phenomenon, and use the appropriate standard, long, or + field name, or the STASH code for each model. Blank entries indicate that a + model does not have that phenomenon, so it will be skipped. + + Ignore the boxes for models that are not enabled. +compulsory=true +element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 +type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted +length=: +sort-key=surface1 + +[template variables=PLOT_SPATIAL_SURFACE_MODEL_FIELD] +ns=Diagnostics/Quicklook +description=Create plots for the specified surface fields. +help=See includes/plot_spatial_surface_model_field.cylc +type=python_boolean +compulsory=true +sort-key=surface2 + +[template variables=LFRIC_PLOT_SPATIAL_SURFACE_MODEL_FIELD] +ns=Diagnostics/Quicklook +description=Create plots for the specified surface fields for structured LFRic data. +help=See includes/lfric_plot_spatial_surface_model_field.cylc +type=python_boolean +compulsory=true +sort-key=surface2 + +[template variables=DOMAIN_MEAN_SURFACE_TIME_SERIES] +ns=Diagnostics/Quicklook +description=Create time series plot of surface field domain mean. +help=See includes/deterministic_domain_mean_surface_time_series.cylc +type=python_boolean +compulsory=true +sort-key=surface3 + +[template variables=LFRIC_DOMAIN_MEAN_SURFACE_TIME_SERIES] +ns=Diagnostics/Quicklook +description=Create time series plot of surface field domain mean for structured LFRic data. +help=See includes/lfric_deterministic_domain_mean_surface_time_series.cylc +type=python_boolean +compulsory=true +sort-key=surface3 + [template variables=METPLUS_POINT_STAT] -ns=Diagnostics +ns=Diagnostics/Verification description=Run METplus point stat in the workflow. help=If True, it will enable the production of verification statistics against point observations (i.e. land-synop, sonde,...). @@ -365,7 +1502,7 @@ compulsory=true sort-key=met1 [template variables=METPLUS_GRID_STAT] -ns=Diagnostics +ns=Diagnostics/Verification description=Run METplus grid stat in the workflow. help=If True, it will enable the production of verification statistics against gridded data (i.e. radar, analysis,...). @@ -376,7 +1513,7 @@ compulsory=true sort-key=met2 [template variables=METPLUS_OPT_CONFIG_KEYS] -ns=Diagnostics +ns=Diagnostics/Verification description=Which METplus configuration to run. help=Selects the site/model specific configuration to use. The configuration must exist as restricted files in @@ -386,19 +1523,19 @@ value-hints="metoffice", "niwa" sort-key=met3 [template variables=METPLUS_OBS_DIR] -ns=Diagnostics +ns=Diagnostics/Verification description=Path to directory containing observations in MET ASCII format. type=quoted sort-key=met4 [template variables=METPLUS_ANA_DIR] -ns=Diagnostics +ns=Diagnostics/Verification description=Path to directory containing analysis in MET netcdf format. type=quoted sort-key=met5 [template variables=DETERMINISTIC_PLOT_CAPE_RATIO] -ns=Diagnostics +ns=Diagnostics/Convection description=Extracts data required for, and calculates the CAPE ratio diagnostic, plotting on a map. Required STASH m01s20i114, m01s20i112, m01s20i113. help=See includes/deterministic_plot_cape_ratio.cylc @@ -465,21 +1602,6 @@ type=quoted compulsory=true sort-key=cs5 -[template variables=SURFACE_MODEL_FIELDS] -ns=Diagnostics -description=List of standard names of model fields to plot. -type=python_list -compulsory=true -sort-key=surface1 - -[template variables=PLOT_SPATIAL_SURFACE_MODEL_FIELD] -ns=Diagnostics -description=Create plots for the specified surface fields. -help=See includes/plot_spatial_surface_model_field.cylc -type=python_boolean -compulsory=true -sort-key=surface2 - [template variables=DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD] ns=Diagnostics description=Create a series of histogram plots for selected surface fields for each cycle time. @@ -488,30 +1610,6 @@ type=python_boolean compulsory=true sort-key=surface2 -[template variables=LFRIC_PLOT_SPATIAL_SURFACE_MODEL_FIELD] -ns=Diagnostics -description=Create plots for the specified surface fields for structured LFRic data. -help=See includes/lfric_plot_spatial_surface_model_field.cylc -type=python_boolean -compulsory=true -sort-key=surface2 - -[template variables=DOMAIN_MEAN_SURFACE_TIME_SERIES] -ns=Diagnostics -description=Create time series plot of surface field domain mean. -help=See includes/deterministic_domain_mean_surface_time_series.cylc -type=python_boolean -compulsory=true -sort-key=surface3 - -[template variables=LFRIC_DOMAIN_MEAN_SURFACE_TIME_SERIES] -ns=Diagnostics -description=Create time series plot of surface field domain mean for structured LFRic data. -help=See includes/lfric_deterministic_domain_mean_surface_time_series.cylc -type=python_boolean -compulsory=true -sort-key=surface3 - [template variables=DOMAIN_MEAN_VERTICAL_PROFILE_SERIES] ns=Diagnostics description=Domain averaged vertical profile for each validity time. @@ -756,7 +1854,7 @@ sort-key=subsection1 [template variables=SUBAREA_LAT_BOUND_TOP] ns=Diagnostics description=Top edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid's native units. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. type=real sort-key=subsection2 compulsory=true @@ -764,7 +1862,7 @@ compulsory=true [template variables=SUBAREA_LAT_BOUND_BOTTOM] ns=Diagnostics description=Bottom edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid's native units. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. type=real sort-key=subsection2 compulsory=true @@ -772,7 +1870,7 @@ compulsory=true [template variables=SUBAREA_LON_BOUND_LEFT] ns=Diagnostics description=Left edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid's native units. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. type=real sort-key=subsection2 compulsory=true @@ -780,13 +1878,13 @@ compulsory=true [template variables=SUBAREA_LON_BOUND_RIGHT] ns=Diagnostics description=Right edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid's native units. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. type=real sort-key=subsection2 compulsory=true [template variables=DETERMINISTIC_PLOT_INFLOW_PROPERTIES] -ns=Diagnostics +ns=Diagnostics/Convection description=Extracts data required for, and calculates the inflow properties diagnostic, plotting on a map. Required STASH m01s20i119, m01s00i025, m01s00i033. help=See includes/deterministic_plot_inflow_properties.cylc From c987f80aa319c696184617c18aa230c5fed07a11 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 9 Aug 2024 11:58:20 +0100 Subject: [PATCH 02/90] Add model name to rose edit conf --- cset-workflow/meta/rose-meta.conf | 133 +++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 96e77de43..3e8a3228f 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -340,13 +340,15 @@ range=1:10 compulsory=true sort-key=e1 # Need to link to all of the model settings here. -trigger=template variables=m01_data_source: this >= 1; +trigger=template variables=m01_name: this >= 1; + template variables=m01_data_source: this >= 1; template variables=m01_data_path: this >= 1; template variables=m01_date_type: this >= 1; template variables=m01_data_period: this >= 1; template variables=m01_preprocessing: this >= 1; template variables=m01_preprocessing_recipe: this >= 1; + template variables=m02_name: this >= 2; template variables=m02_data_source: this >= 2; template variables=m02_data_path: this >= 2; template variables=m02_date_type: this >= 2; @@ -354,6 +356,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m02_preprocessing: this >= 2; template variables=m02_preprocessing_recipe: this >= 2; + template variables=m03_name: this >= 3; template variables=m03_data_source: this >= 3; template variables=m03_data_path: this >= 3; template variables=m03_date_type: this >= 3; @@ -361,6 +364,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m03_preprocessing: this >= 3; template variables=m03_preprocessing_recipe: this >= 3; + template variables=m04_name: this >= 4; template variables=m04_data_source: this >= 4; template variables=m04_data_path: this >= 4; template variables=m04_date_type: this >= 4; @@ -368,6 +372,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m04_preprocessing: this >= 4; template variables=m04_preprocessing_recipe: this >= 4; + template variables=m05_name: this >= 5; template variables=m05_data_source: this >= 5; template variables=m05_data_path: this >= 5; template variables=m05_date_type: this >= 5; @@ -375,6 +380,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m05_preprocessing: this >= 5; template variables=m05_preprocessing_recipe: this >= 5; + template variables=m06_name: this >= 6; template variables=m06_data_source: this >= 6; template variables=m06_data_path: this >= 6; template variables=m06_date_type: this >= 6; @@ -382,6 +388,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m06_preprocessing: this >= 6; template variables=m06_preprocessing_recipe: this >= 6; + template variables=m07_name: this >= 7; template variables=m07_data_source: this >= 7; template variables=m07_data_path: this >= 7; template variables=m07_date_type: this >= 7; @@ -389,6 +396,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m07_preprocessing: this >= 7; template variables=m07_preprocessing_recipe: this >= 7; + template variables=m08_name: this >= 8; template variables=m08_data_source: this >= 8; template variables=m08_data_path: this >= 8; template variables=m08_date_type: this >= 8; @@ -396,6 +404,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m08_preprocessing: this >= 8; template variables=m08_preprocessing_recipe: this >= 8; + template variables=m09_name: this >= 9; template variables=m09_data_source: this >= 9; template variables=m09_data_path: this >= 9; template variables=m09_date_type: this >= 9; @@ -403,6 +412,7 @@ trigger=template variables=m01_data_source: this >= 1; template variables=m09_preprocessing: this >= 9; template variables=m09_preprocessing_recipe: this >= 9; + template variables=m10_name: this >= 10; template variables=m10_data_source: this >= 10; template variables=m10_data_path: this >= 10; template variables=m10_date_type: this >= 10; @@ -415,6 +425,17 @@ trigger=template variables=m01_data_source: this >= 1; # Model definitions. Repeat section for each desired model. ################################################################################ +# [template variables=m??_name] +# ns=Models and Cases/Model ?? +# title=Model name +# description=A friendly name for the model. +# help=A recognisable name for this particular model. This is carried through to +# the output webpage, and identifies the model. As it is only used for +# display purposes it can be any string. +# type=quoted +# compulsory=true +# sort-key=a0 + # # Model ?? # [template variables=m??_data_source] # ns=Models and Cases/Model ?? @@ -509,6 +530,17 @@ trigger=template variables=m01_data_source: this >= 1; # Model 01 +[template variables=m01_name] +ns=Models and Cases/Model 01 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m01_data_source] ns=Models and Cases/Model 01 title=Data source @@ -602,6 +634,17 @@ sort-key=c2 # Model 02 +[template variables=m02_name] +ns=Models and Cases/Model 02 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m02_data_source] ns=Models and Cases/Model 02 title=Data source @@ -695,6 +738,17 @@ sort-key=c2 # Model 03 +[template variables=m03_name] +ns=Models and Cases/Model 03 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m03_data_source] ns=Models and Cases/Model 03 title=Data source @@ -788,6 +842,17 @@ sort-key=c2 # Model 04 +[template variables=m04_name] +ns=Models and Cases/Model 04 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m04_data_source] ns=Models and Cases/Model 04 title=Data source @@ -881,6 +946,17 @@ sort-key=c2 # Model 05 +[template variables=m05_name] +ns=Models and Cases/Model 05 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m05_data_source] ns=Models and Cases/Model 05 title=Data source @@ -974,6 +1050,17 @@ sort-key=c2 # Model 06 +[template variables=m06_name] +ns=Models and Cases/Model 06 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m06_data_source] ns=Models and Cases/Model 06 title=Data source @@ -1067,6 +1154,17 @@ sort-key=c2 # Model 07 +[template variables=m07_name] +ns=Models and Cases/Model 07 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m07_data_source] ns=Models and Cases/Model 07 title=Data source @@ -1160,6 +1258,17 @@ sort-key=c2 # Model 08 +[template variables=m08_name] +ns=Models and Cases/Model 08 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m08_data_source] ns=Models and Cases/Model 08 title=Data source @@ -1253,6 +1362,17 @@ sort-key=c2 # Model 09 +[template variables=m09_name] +ns=Models and Cases/Model 09 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m09_data_source] ns=Models and Cases/Model 09 title=Data source @@ -1346,6 +1466,17 @@ sort-key=c2 # Model 10 +[template variables=m10_name] +ns=Models and Cases/Model 10 +title=Model name +description=A friendly name for the model. +help=A recognisable name for this particular model. This is carried through to + the output webpage, and identifies the model. As it is only used for + display purposes it can be any string. +type=quoted +compulsory=true +sort-key=a0 + [template variables=m10_data_source] ns=Models and Cases/Model 10 title=Data source From 302b1f8b09befb47de920b6bfc4c5954869f220c Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 2 Aug 2024 12:02:45 +0100 Subject: [PATCH 03/90] Change how data is fetched to work on data time Also now use subclasses for getting individual files from the various data sources. --- .../fetch_fcst/bin/fetch-data-filesystem.py | 6 +- .../app/fetch_fcst/bin/fetch-data-http.py | 33 ++++ .../app/fetch_fcst/opt/rose-app-http.conf | 2 + src/CSET/_workflow_utils/fetch_data.py | 160 ++++++++++++++++++ .../_workflow_utils/fetch_data_filesystem.py | 30 ++++ 5 files changed, 228 insertions(+), 3 deletions(-) create mode 100644 cset-workflow/app/fetch_fcst/bin/fetch-data-http.py create mode 100644 cset-workflow/app/fetch_fcst/opt/rose-app-http.conf create mode 100755 src/CSET/_workflow_utils/fetch_data.py diff --git a/cset-workflow/app/fetch_fcst/bin/fetch-data-filesystem.py b/cset-workflow/app/fetch_fcst/bin/fetch-data-filesystem.py index d415b9170..e48f7dd47 100755 --- a/cset-workflow/app/fetch_fcst/bin/fetch-data-filesystem.py +++ b/cset-workflow/app/fetch_fcst/bin/fetch-data-filesystem.py @@ -1,7 +1,7 @@ #! /usr/bin/env python3 -"""Retrieve the files from the filesystem for the current cycle point.""" +"""Retrieve files from the filesystem.""" -import CSET._workflow_utils.fetch_data_filesystem +from CSET._workflow_utils.fetch_data import FilesystemFileRetriever, fetch_data -CSET._workflow_utils.fetch_data_filesystem.run() +fetch_data(FilesystemFileRetriever) diff --git a/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py b/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py new file mode 100644 index 000000000..4b67d0451 --- /dev/null +++ b/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py @@ -0,0 +1,33 @@ +#! /usr/bin/env python3 + +"""Retrieve files via HTTP.""" + +import ssl +import urllib.parse +import urllib.request + +from CSET._workflow_utils.fetch_data import FileRetriever, fetch_data + + +class HTTPFileRetriever(FileRetriever): + """Retrieve files via HTTP.""" + + def get_file(self, file_path: str, output_dir: str) -> None: + """Save a file from a HTTP address to the output directory. + + Parameters + ---------- + file_path: str + Path of the file to copy on MASS. It may contain patterns + like globs, which will be expanded in a system specific manner. + output_dir: str + Path to filesystem directory into which the file should be copied. + """ + ctx = ssl.create_default_context() + save_path = urllib.parse.urlparse(file_path).path.split("/")[-1] + with urllib.request.urlopen(file_path, output_dir, context=ctx) as response: + with open(save_path, "wb") as fp: + fp.write(response.read()) + + +fetch_data(HTTPFileRetriever) diff --git a/cset-workflow/app/fetch_fcst/opt/rose-app-http.conf b/cset-workflow/app/fetch_fcst/opt/rose-app-http.conf new file mode 100644 index 000000000..5a84f0c97 --- /dev/null +++ b/cset-workflow/app/fetch_fcst/opt/rose-app-http.conf @@ -0,0 +1,2 @@ +[command] +default=app_env_wrapper fetch-data-http.py diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py new file mode 100755 index 000000000..c4110fc42 --- /dev/null +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -0,0 +1,160 @@ +#! /usr/bin/env python3 + +"""Retrieve the files from the filesystem for the current cycle point.""" + +import abc +import glob +import logging +import os +import shutil +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime, timedelta + +import isodate + +logging.basicConfig( + level=os.getenv("LOGLEVEL", "INFO"), format="%(asctime)s %(levelname)s %(message)s" +) + + +class FileRetriever(abc.ABC): + """Abstract class for retrieving files from a data source. + + The `get_file` method must be defined. Optionally the __enter__ and __exit__ + methods maybe be overridden to add setup or cleanup code. + + The class is designed to be used as a context manager, so that resources can + be cleaned up after the retrieval is complete. All the files of a model are + retrieved within a single context manager block, within which the `get_file` + method is called for each file path. + """ + + def __enter__(self) -> "FileRetriever": + """Initialise the file retriever.""" + logging.debug("Initialising FileRetriever.") + return self + + def __exit__(self, exc_type, exc_value, traceback): + """Clean up the file retriever.""" + logging.debug("Tearing down FileRetriever.") + + @abc.abstractmethod + def get_file(self, file_path: str, output_dir: str) -> None: + """Save a file from the data source to the output directory. + + Not all of the given paths will exist, so FileNotFoundErrors should be + logged, but not raised. + + Implementations should be thread safe, as the method is called from + multiple threads. + + Parameters + ---------- + file_path: str + Path of the file to copy on the data source. It may contain patterns + like globs, which will be expanded in a system specific manner. + output_dir: str + Path to filesystem directory into which the file should be copied. + """ + raise NotImplementedError + + +class FilesystemFileRetriever(FileRetriever): + """Retrieve files from the filesystem.""" + + def get_file(self, file_path: str, output_dir: str) -> None: + """Save a file from the filesystem to the output directory. + + Parameters + ---------- + file_path: str + Path of the file to copy on the filesystem. It may contain patterns + like globs, which will be expanded in a system specific manner. + output_dir: str + Path to filesystem directory into which the file should be copied. + """ + file_paths = glob.glob(os.path.expanduser(file_path)) + logging.debug("Copying files:\n%s", "\n".join(file_paths)) + for file in file_paths: + try: + shutil.copy(file, output_dir) + except OSError as err: + logging.warning("Failed to copy %s, error: %s", file, err) + + +def _template_file_path(): + """Fill time placeholders to generate a file path to fetch.""" + raw_path = os.environ["DATA_PATH"] + date_type = os.environ["DATE_TYPE"] + data_time = datetime.fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]) + forecast_length = isodate.parse_duration(os.environ["CSET_ANALYSIS_PERIOD"]) + forecast_offset = isodate.parse_duration(os.environ["CSET_ANALYSIS_OFFSET"]) + + placeholder_times: list[datetime] = [] + lead_times: list[timedelta] = [] + match date_type: + case "validity": + date = data_time + data_period = isodate.parse_duration(os.getenv("DATA_PERIOD")) + while date < data_time + forecast_length: + placeholder_times.append(date) + date += data_period + case "initiation": + placeholder_times.append(data_time) + case "lead": + placeholder_times.append(data_time) + data_period = isodate.parse_duration(os.getenv("DATA_PERIOD")) + lead_time = forecast_offset + while lead_time < forecast_length: + lead_times.append(lead_time) + lead_time += data_period + case _: + raise ValueError(f"Invalid date type: {date_type}") + + paths: list[str] = [] + for placeholder_time in placeholder_times: + # Expand out all other format strings. + path = placeholder_time.strftime(os.path.expandvars(raw_path)) + + # Expand out lead time format strings, %N. + for lead_time in lead_times: + # BUG: Will not respect escaped % signs, e.g: %%N. + paths.append( + path.replace("%N", f"{int(lead_time.total_seconds()) // 3600:03d}") + ) + else: + paths.append(path) + return paths + + +def fetch_data(file_retriever: FileRetriever = FilesystemFileRetriever): + """Fetch the model's data. + + The following environment variables need to be set: + * CSET_ANALYSIS_OFFSET + * CSET_ANALYSIS_PERIOD + * CYLC_TASK_CYCLE_POINT + * DATA_PATH + * DATA_PERIOD - If DATE_TYPE is not 'initialisation' + * DATE_TYPE + * MODEL_NUMBER + + Parameters + ---------- + file_retriever: FileRetriever + FileRetriever implementation to use. Defaults to FilesystemFileRetriever. + """ + # Prepare output directory. + model_number = os.getenv("MODEL_NUMBER") + cycle_share_data_dir = f"{os.getenv('CYLC_WORKFLOW_SHARE_DIR')}/cycle/{os.getenv('CYLC_TASK_CYCLE_POINT')}/data/{model_number}" + os.makedirs(cycle_share_data_dir, exist_ok=True) + logging.debug("Output directory: %s", cycle_share_data_dir) + + # Get file paths. + paths = _template_file_path() + logging.info("Retrieving paths:\n%s", "\n".join(paths)) + + # Use file retriever to transfer data with multiple threads. + with file_retriever() as retriever, ThreadPoolExecutor() as executor: + for path in paths: + executor.submit(retriever.get_file, path, cycle_share_data_dir) diff --git a/src/CSET/_workflow_utils/fetch_data_filesystem.py b/src/CSET/_workflow_utils/fetch_data_filesystem.py index bd0089480..d50eb0dc0 100755 --- a/src/CSET/_workflow_utils/fetch_data_filesystem.py +++ b/src/CSET/_workflow_utils/fetch_data_filesystem.py @@ -6,6 +6,9 @@ import logging import os import shutil +from datetime import datetime + +import isodate from CSET._workflow_utils import validity_time_tester @@ -13,6 +16,33 @@ level=os.getenv("LOGLEVEL", "INFO"), format="%(asctime)s %(levelname)s %(message)s" ) +# os.getenv("DATA_SOURCE") + + +def template_file_path(): + """Fill time placeholders to generate a file path to fetch.""" + raw_path = os.environ["DATA_PATH"] + date_type = os.environ["DATE_TYPE"] + data_period = isodate.parse_duration(os.getenv("DATA_PERIOD")) + data_time = datetime.fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]) + forecast_length = isodate.parse_duration(os.environ["CSET_VERIFICATION_PERIOD"]) + forecast_offset = isodate.parse_duration(os.environ["CSET_VERIFICATION_OFFSET"]) + + placeholder_times = set() + match date_type: + case "validity": + date = data_time + while date < data_time + forecast_length: + placeholder_times.add(date) + date += data_period + case "initiation": + placeholder_times.add(data_time) + case "lead": + # TODO: Figure out how we are doing lead time. + pass + case _: + raise ValueError(f"Invalid date type: {date_type}") + # Excluded from coverage temporarily as script has be rewritten when data time # cycling lands. From d8f3042f001998ab7370fad3d7f5eec9e67c2c6e Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 5 Aug 2024 16:21:39 +0100 Subject: [PATCH 04/90] Modify data fetching in workflow to fetch per model Grab rose template variables directly, rather than via jinja context Add workflow exported functions to __all__ Add licence header to jinja_utils Require success for processing tasks in workflow Housekeep only on success, but finish workflow regardless --- .gitignore | 2 - cset-workflow/Jinja2Globals/glob.py | 3 - cset-workflow/Jinja2Globals/zip.py | 3 - cset-workflow/flow.cylc | 50 +++++--- ...deterministic_domain_histogram_series.cylc | 2 +- .../plot_spatial_surface_model_field.cylc | 9 +- cset-workflow/lib/python/jinja_utils.py | 114 ++++++++++++++++++ 7 files changed, 153 insertions(+), 30 deletions(-) delete mode 100644 cset-workflow/Jinja2Globals/glob.py delete mode 100644 cset-workflow/Jinja2Globals/zip.py create mode 100644 cset-workflow/lib/python/jinja_utils.py diff --git a/.gitignore b/.gitignore index 4f5605398..f31f3d279 100644 --- a/.gitignore +++ b/.gitignore @@ -14,8 +14,6 @@ dist/ downloads/ eggs/ .eggs/ -lib/ -lib64/ parts/ sdist/ var/ diff --git a/cset-workflow/Jinja2Globals/glob.py b/cset-workflow/Jinja2Globals/glob.py deleted file mode 100644 index 4bb318687..000000000 --- a/cset-workflow/Jinja2Globals/glob.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Make glob function available to jinja.""" - -from glob import glob # noqa: F401 diff --git a/cset-workflow/Jinja2Globals/zip.py b/cset-workflow/Jinja2Globals/zip.py deleted file mode 100644 index 9fbfad0ba..000000000 --- a/cset-workflow/Jinja2Globals/zip.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Make zip function available to jinja.""" - -from builtins import zip # noqa: F401 diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index 271f2f135..00a74bc96 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -4,9 +4,13 @@ title = CSET description = Workflow for running CSET. URL = https://metoffice.github.io/CSET +{% from "jinja_utils" import get_models, glob, max, min, zip, restructure_field_list, sanitise_task_name %} +{% set models = get_models(ROSE_SUITE_VARIABLES) %} + [scheduling] runahead limit = P{{CSET_RUNAHEAD_LIMIT}} - initial cycle point = 1000-01-01 + initial cycle point = {{ min(CSET_CASE_DATES) }} + final cycle point = {{ max(CSET_CASE_DATES) }} [[graph]] # Only runs on the first cycle. @@ -19,14 +23,14 @@ URL = https://metoffice.github.io/CSET # Runs for every forecast initiation time to process the data in parallel. {% for date in CSET_CASE_DATES %} R1/{{date}} = """ - setup_complete[^] => FETCH_DATA:succeed-all => PROCESS:finish-all => process_finish => housekeeping_raw + setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete => PROCESS:finish-all => housekeeping_raw """ {% endfor %} # Only runs on the final cycle. R1/$ = """ - PROCESS:finish-all => finish_website => send_email - PROCESS:finish-all => housekeeping_full + housekeeping_raw => finish_website => send_email + housekeeping_raw => housekeeping_full """ [runtime] @@ -62,17 +66,23 @@ URL = https://metoffice.github.io/CSET [[PROCESS]] script = rose task-run -v --app-key=run_cset_recipe + execution time limit = PT1H [[FETCH_DATA]] + script = rose task-run -v --app-key=fetch_fcst + execution time limit = PT1H + [[[environment]]] + CSET_ANALYSIS_OFFSET = {{CSET_ANALYSIS_OFFSET}} + CSET_ANALYSIS_PERIOD = {{CSET_ANALYSIS_PERIOD}} [[METPLUS]] [[[environment]]] - {% if METPLUS_GRID_STAT %} - METPLUS_ANA_DIR = {{METPLUS_ANA_DIR}} - METPLUS_FCST_DIR = {{METPLUS_FCST_DIR}} - METPLUS_OBS_DIR = {{METPLUS_OBS_DIR}} - ROSE_APP_OPT_CONF_KEYS = {{METPLUS_OPT_CONFIG_KEYS}} - {% endif %} + {% if METPLUS_GRID_STAT %} + METPLUS_ANA_DIR = {{METPLUS_ANA_DIR}} + METPLUS_FCST_DIR = {{METPLUS_FCST_DIR}} + METPLUS_OBS_DIR = {{METPLUS_OBS_DIR}} + ROSE_APP_OPT_CONF_KEYS = {{METPLUS_OPT_CONFIG_KEYS}} + {% endif %} [[DUMMY_TASK]] script = true @@ -82,6 +92,9 @@ URL = https://metoffice.github.io/CSET [[setup_complete]] inherit = DUMMY_TASK + [[fetch_complete]] + inherit = DUMMY_TASK + [[build_conda]] # Create the conda environment if it does not yet exist, possibly installing # CSET from source. @@ -96,18 +109,19 @@ URL = https://metoffice.github.io/CSET [[install_website_skeleton]] # Copies the static files that make up the web interface. - [[fetch_fcst]] + {% for model in models %} + [[fetch_fcst_m{{model["number"]}}]] # Fetch data from disk or a file based archival system. inherit = FETCH_DATA [[[environment]]] - ROSE_APP_OPT_CONF_KEYS = {{FETCH_FCST_OPT_CONF}} - CSET_INPUT_FILE_PATH = {{CSET_INPUT_FILE_PATH}} - {% if CSET_INCREMENTAL_DATA_FETCH %} - CSET_FILE_NAME_METADATA_PATTERN = {{CSET_FILE_NAME_METADATA_PATTERN}} - CSET_CYCLE_PERIOD = {{CSET_CYCLE_PERIOD}} - CSET_TIMES_PER_FILE = {{CSET_TIMES_PER_FILE}} - CSET_FILE_TIME_OFFSET = {{CSET_FILE_TIME_OFFSET}} + MODEL_NUMBER = {{model["number"]}} + ROSE_APP_OPT_CONF_KEYS = {{model["data_source"]}} + DATA_PATH = {{model["data_path"]}} + DATE_TYPE = {{model["date_type"]}} + {% if model["date_type"] != "initiation" %} + DATA_PERIOD = {{model["data_period"]}} {% endif %} + {% endfor %} [[housekeeping_raw]] # Housekeep unprocessed data files. diff --git a/cset-workflow/includes/deterministic_domain_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_histogram_series.cylc index 1e511b27f..93db04145 100644 --- a/cset-workflow/includes/deterministic_domain_histogram_series.cylc +++ b/cset-workflow/includes/deterministic_domain_histogram_series.cylc @@ -1,4 +1,4 @@ -{% if DOMAIN_HISTOGRAM_SERIES_FIELD %} +{% if DOMAIN_HISTOGRAM_SERIES %} [runtime] {% for model_field in MODEL_LEVEL_MODEL_FIELDS %} [[pre_process_deterministic_domain_histogram_series_{{model_field}}]] diff --git a/cset-workflow/includes/plot_spatial_surface_model_field.cylc b/cset-workflow/includes/plot_spatial_surface_model_field.cylc index aa4bc11db..f0d69759d 100644 --- a/cset-workflow/includes/plot_spatial_surface_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_surface_model_field.cylc @@ -1,10 +1,13 @@ {% if PLOT_SPATIAL_SURFACE_MODEL_FIELD %} -{% for model_field in SURFACE_MODEL_FIELDS %} +{% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} [runtime] - [[generic_spatial_plot_time_series_{{model_field}}]] + [[generic_spatial_plot_time_series_m{{model_number}}_{{sanitise_task_name(field)}}]] inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_surface_spatial_plot_sequence.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}}" + CSET_ADDOPTS = "--VARNAME={{field}}" + MODEL_NUMBER = {{model_number}} +{% endfor %} {% endfor %} {% endif %} diff --git a/cset-workflow/lib/python/jinja_utils.py b/cset-workflow/lib/python/jinja_utils.py new file mode 100644 index 000000000..30be43aa6 --- /dev/null +++ b/cset-workflow/lib/python/jinja_utils.py @@ -0,0 +1,114 @@ +# © Crown copyright, Met Office (2022-2024) and CSET contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Useful functions for the workflow.""" + +import itertools +from builtins import max, min, zip +from glob import glob + +# Reexport functions for use within workflow. +__all__ = [ + "get_models", + "restructure_field_list", + "sanitise_task_name", + # Reexported functions. + "max", + "min", + "zip", + "glob", +] + + +def get_models(rose_variables: dict): + """Load per-model configuration into a single object. + + Returns a list of dictionaries, each one containing a per-model + configuration. + """ + models = [] + for model in range(1, 11): + model_prefix = f"m{model:02d}_" + model_vars = { + key.removeprefix(model_prefix): value + for key, value in rose_variables.items() + if key.startswith(model_prefix) + } + if model_vars: + model_vars["number"] = model + models.append(model_vars) + return models + + +def _batched(iterable, n): + """Implement itertools.batched for Python < 3.12. + + batched('ABCDEFG', 3) → ABC DEF G + https://docs.python.org/3/library/itertools.html#itertools.batched + """ + if n < 1: + raise ValueError("n must be at least one") + iterator = iter(iterable) + while batch := tuple(itertools.islice(iterator, n)): + yield batch + + +def restructure_field_list(fields: list): + """Restructure a 1D list of fields into a 2D list.""" + # ('m01s03i236', 'temp_at_screen_level', '', '', '', '', '', '', '', '', + # 'm01s03i230', 'wind_speed_at_10m', '', '', '', '', '', '', '', '') + # -> [{1: "m01s03i236", 2: "temp_at_screen_level"}, + # {1: "m01s03i230", 2: "wind_speed_at_10m"}] + max_number_of_models = 10 + assert len(fields) % max_number_of_models == 0 + # itertools.batched is from python 3.12 + batched = getattr(itertools, "batched", _batched) + all_fields = batched(fields, max_number_of_models) + rearranged = [ + { + field[0] + 1: field[1] + for field in enumerate(equivalent_model_fields) + if field[1] + } + for equivalent_model_fields in all_fields + ] + return rearranged + + +def sanitise_task_name(s: str): + """Sanitise a string to be used as a Cylc task name. + + Rules per + https://cylc.github.io/cylc-doc/stable/html/user-guide/writing-workflows/runtime.html#cylc.flow.unicode_rules.TaskNameValidator + The rules for valid task and family names: + * must start with: alphanumeric + * can only contain: alphanumeric, _, -, +, %, @ + * cannot start with: _cylc + * cannot be: root + + Note that actually there are a few more characters supported, see: + https://github.com/cylc/cylc-flow/issues/6288 + """ + # Ensure the first character is alphanumeric. + if not s[0].isalnum(): + s = f"sanitised_{s}" + # Replace invalid characters with underscores. + s = "".join(c if c.isalnum() or c in "-+%@" else "_" for c in s) + # Ensure the name is not a reserved name. + if s.lower() == "root": + s = f"sanitised_{s}" + # Ensure the name does not start with "_cylc". + if s.lower().startswith("_cylc"): + s = f"sanitised_{s}" + return s From 2816af68987f70fcd728591431581ff6cee8cd67 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 6 Aug 2024 10:50:49 +0100 Subject: [PATCH 05/90] Change recipe format to just use steps, and update infra for running them I've updated one recipe for now, but they will all have to be converted. --- src/CSET/__init__.py | 38 ++++-------- src/CSET/_common.py | 14 ++--- src/CSET/_workflow_utils/run_cset_recipe.py | 43 +++---------- src/CSET/operators/__init__.py | 60 +++---------------- ...generic_surface_spatial_plot_sequence.yaml | 12 +--- 5 files changed, 32 insertions(+), 135 deletions(-) diff --git a/src/CSET/__init__.py b/src/CSET/__init__.py index 92c75fc19..4e4f5641e 100644 --- a/src/CSET/__init__.py +++ b/src/CSET/__init__.py @@ -50,6 +50,7 @@ def main(): "-i", "--input-dir", type=Path, + required=True, help="directory containing input data", ) parser_bake.add_argument( @@ -66,13 +67,6 @@ def main(): required=True, help="recipe file to read", ) - bake_step_control = parser_bake.add_mutually_exclusive_group() - bake_step_control.add_argument( - "--parallel-only", action="store_true", help="only run parallel steps" - ) - bake_step_control.add_argument( - "--collate-only", action="store_true", help="only run collation steps" - ) parser_bake.add_argument( "-s", "--style-file", type=Path, help="colour bar definition to use" ) @@ -197,29 +191,17 @@ def calculate_loglevel(args) -> int: def _bake_command(args, unparsed_args): from CSET._common import parse_variable_options - from CSET.operators import execute_recipe_collate, execute_recipe_parallel + from CSET.operators import execute_recipe recipe_variables = parse_variable_options(unparsed_args) - if not args.collate_only: - # Input dir is needed for parallel steps, but not collate steps. - if not args.input_dir: - raise ArgumentError("the following arguments are required: -i/--input-dir") - execute_recipe_parallel( - args.recipe, - args.input_dir, - args.output_dir, - recipe_variables, - args.style_file, - args.plot_resolution, - ) - if not args.parallel_only: - execute_recipe_collate( - args.recipe, - args.output_dir, - recipe_variables, - args.style_file, - args.plot_resolution, - ) + execute_recipe( + args.recipe, + args.input_dir, + args.output_dir, + recipe_variables, + args.style_file, + args.plot_resolution, + ) def _graph_command(args, unparsed_args): diff --git a/src/CSET/_common.py b/src/CSET/_common.py index f4db573b7..6535aff4f 100644 --- a/src/CSET/_common.py +++ b/src/CSET/_common.py @@ -72,13 +72,14 @@ def parse_recipe(recipe_yaml: Union[Path, str], variables: dict = None): except ruamel.yaml.parser.ParserError as err: raise ValueError("ParserError: Invalid YAML") from err - logging.debug(recipe) + logging.debug("Recipe before templating:\n%s", recipe) check_recipe_has_steps(recipe) if variables is not None: logging.debug("Recipe variables: %s", variables) recipe = template_variables(recipe, variables) + logging.debug("Recipe after templating:\n%s", recipe) return recipe @@ -103,16 +104,15 @@ def check_recipe_has_steps(recipe: dict): KeyError If needed recipe variables are not supplied. """ - parallel_steps_key = "parallel" if not isinstance(recipe, dict): raise TypeError("Recipe must contain a mapping.") - if "parallel" not in recipe: - raise ValueError("Recipe must contain a 'parallel' key.") + if "steps" not in recipe: + raise ValueError("Recipe must contain a 'steps' key.") try: - if len(recipe[parallel_steps_key]) < 1: - raise ValueError("Recipe must have at least 1 parallel step.") + if len(recipe["steps"]) < 1: + raise ValueError("Recipe must have at least 1 step.") except TypeError as err: - raise ValueError("'parallel' key must contain a sequence of steps.") from err + raise ValueError("'steps' key must contain a sequence of steps.") from err def slugify(s: str) -> str: diff --git a/src/CSET/_workflow_utils/run_cset_recipe.py b/src/CSET/_workflow_utils/run_cset_recipe.py index ba7d22bbe..982e72539 100755 --- a/src/CSET/_workflow_utils/run_cset_recipe.py +++ b/src/CSET/_workflow_utils/run_cset_recipe.py @@ -54,7 +54,8 @@ def recipe_id(): ) p.check_returncode() id = p.stdout.decode(sys.stdout.encoding).strip() - return id + model_number = os.environ["MODEL_NUMBER"] + return f"m{model_number}_{id}" def output_directory(): @@ -67,7 +68,8 @@ def data_directory(): """Get the input data directory for the cycle.""" share_directory = os.environ["CYLC_WORKFLOW_SHARE_DIR"] cycle_point = os.environ["CYLC_TASK_CYCLE_POINT"] - return f"{share_directory}/cycle/{cycle_point}/data" + model_number = os.environ["MODEL_NUMBER"] + return f"{share_directory}/cycle/{cycle_point}/data/{model_number}" def create_diagnostic_archive(output_directory): @@ -83,33 +85,7 @@ def create_diagnostic_archive(output_directory): archive.write(file, arcname=file.relative_to(output_directory)) -# Not covered by tests as will soon be removed in #765. -def parallel(): # pragma: no cover - """Process raw data in parallel.""" - logging.info("Pre-processing data into intermediate form.") - try: - subprocess.run( - ( - "cset", - "-v", - "bake", - f"--recipe={recipe_file()}", - f"--input-dir={data_directory()}", - f"--output-dir={output_directory()}", - f"--style-file={os.getenv('COLORBAR_FILE', '')}", - f"--plot-resolution={os.getenv('PLOT_RESOLUTION', '')}", - "--parallel-only", - ), - check=True, - env=subprocess_env(), - ) - except subprocess.CalledProcessError: - logging.error("cset bake exited non-zero while processing.") - raise - - -# Not covered by tests as will soon be removed in #765. -def collate(): # pragma: no cover +def run_recipe_steps(): """Collate processed data together and produce output plot. If the intermediate directory doesn't exist then we are running a simple @@ -126,10 +102,10 @@ def collate(): # pragma: no cover "-v", "bake", f"--recipe={recipe_file()}", + f"--input-dir={data_directory()}", f"--output-dir={output_directory()}", f"--style-file={os.getenv('COLORBAR_FILE', '')}", f"--plot-resolution={os.getenv('PLOT_RESOLUTION', '')}", - "--collate-only", ), check=True, env=subprocess_env(), @@ -142,9 +118,4 @@ def collate(): # pragma: no cover def run(): """Run workflow script.""" - # Check if we are running in parallel or collate mode. - bake_mode = os.getenv("CSET_BAKE_MODE") - if bake_mode == "parallel": - parallel() - elif bake_mode == "collate": - collate() + run_recipe_steps() diff --git a/src/CSET/operators/__init__.py b/src/CSET/operators/__init__.py index 8101457dc..a1c9cc292 100644 --- a/src/CSET/operators/__init__.py +++ b/src/CSET/operators/__init__.py @@ -46,8 +46,7 @@ "collapse", "constraints", "convection", - "execute_recipe_collate", - "execute_recipe_parallel", + "execute_recipe", "filters", "get_operator", "misc", @@ -152,8 +151,8 @@ def _run_steps( ) -> None: """Execute the steps in a recipe.""" original_working_directory = Path.cwd() - os.chdir(output_directory) try: + os.chdir(output_directory) logger = logging.getLogger() diagnostic_log = logging.FileHandler( filename="CSET.log", mode="w", encoding="UTF-8" @@ -177,7 +176,7 @@ def _run_steps( os.chdir(original_working_directory) -def execute_recipe_parallel( +def execute_recipe( recipe_yaml: Union[Path, str], input_directory: Path, output_directory: Path, @@ -185,7 +184,7 @@ def execute_recipe_parallel( style_file: Path = None, plot_resolution: int = None, ) -> None: - """Parse and executes the parallel steps from a recipe file. + """Parse and executes the steps from a recipe file. Parameters ---------- @@ -216,58 +215,13 @@ def execute_recipe_parallel( TypeError The provided recipe is not a stream or Path. """ - if recipe_variables is None: - recipe_variables = {} recipe = parse_recipe(recipe_yaml, recipe_variables) step_input = Path(input_directory).absolute() - # Create output directory, and an inter-cycle intermediate directory. + # Create output directory. try: - (output_directory / "intermediate").mkdir(parents=True, exist_ok=True) + output_directory.mkdir(parents=True, exist_ok=True) except (FileExistsError, NotADirectoryError) as err: logging.error("Output directory is a file. %s", output_directory) raise err - steps = recipe["parallel"] + steps = recipe["steps"] _run_steps(recipe, steps, step_input, output_directory, style_file, plot_resolution) - - -def execute_recipe_collate( - recipe_yaml: Union[Path, str], - output_directory: Path, - recipe_variables: dict = None, - style_file: Path = None, - plot_resolution: int = None, -) -> None: - """Parse and execute the collation steps from a recipe file. - - Parameters - ---------- - recipe_yaml: Path or str - Path to a file containing, or string of, a recipe's YAML describing the - operators that need running. If a Path is provided it is opened and - read. - output_directory: Path - Pathlike indicating desired location of output. Must already exist. - recipe_variables: dict - Dictionary of variables for the recipe. - style_file: Path, optional - Path to a style file. - plot_resolution: int, optional - Resolution of plots in dpi. - - Raises - ------ - ValueError - The recipe is not well formed. - TypeError - The provided recipe is not a stream or Path. - """ - if recipe_variables is None: - recipe_variables = {} - output_directory = Path(output_directory).resolve() - assert output_directory.is_dir() - recipe = parse_recipe(recipe_yaml, recipe_variables) - # If collate doesn't exist treat it as having no steps. - steps = recipe.get("collate", []) - _run_steps( - recipe, steps, output_directory, output_directory, style_file, plot_resolution - ) diff --git a/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml b/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml index cb401207f..7f3a114fa 100644 --- a/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml +++ b/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml @@ -2,7 +2,7 @@ category: Surface Spatial Plot title: Surface $VARNAME description: Extracts and plots the surface $VARNAME from a file. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints @@ -16,16 +16,6 @@ parallel: operator: constraints.generate_level_constraint coordinate: "pressure" levels: [] - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME - - - operator: write.write_cube_to_nc - filename: intermediate/surface_field - -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - operator: plot.spatial_pcolormesh_plot sequence_coordinate: time From 7ec7031a4d032ffc4a67bdc57bb65fff197cac4c Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 9 Aug 2024 11:23:37 +0100 Subject: [PATCH 06/90] Add cycling for trials The final cycle point task are skipped if doing a continuous trial without a defined end date. --- cset-workflow/flow.cylc | 25 ++++++++++++++++++++++++- cset-workflow/meta/rose-meta.conf | 11 ++++++----- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index 00a74bc96..b1cad3ce9 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -9,8 +9,18 @@ URL = https://metoffice.github.io/CSET [scheduling] runahead limit = P{{CSET_RUNAHEAD_LIMIT}} + + # Initial and final cycle points cover the entire period of interest. + {% if CSET_CYCLING_MODE == "case_study" %} initial cycle point = {{ min(CSET_CASE_DATES) }} final cycle point = {{ max(CSET_CASE_DATES) }} + {% elif CSET_CYCLING_MODE == "trial" %} + initial cycle point = {{CSET_TRIAL_START_DATE}} + # End date can be blank. + {% if CSET_TRIAL_END_DATE %} + final cycle point = {{CSET_TRIAL_END_DATE}} + {% endif %} + {% endif %} [[graph]] # Only runs on the first cycle. @@ -20,18 +30,31 @@ URL = https://metoffice.github.io/CSET install_local_cset & install_website_skeleton => setup_complete """ + {% if CSET_CYCLING_MODE == "case_study" %} # Runs for every forecast initiation time to process the data in parallel. {% for date in CSET_CASE_DATES %} R1/{{date}} = """ - setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete => PROCESS:finish-all => housekeeping_raw + setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete + fetch_complete => PROCESS:finish-all => housekeeping_raw """ {% endfor %} + {% elif CSET_CYCLING_MODE == "trial" %} + # Analyse from each forecast. + {{CSET_TRIAL_CYCLE_PERIOD}} = """ + setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete + fetch_complete => PROCESS:finish-all => housekeeping_raw + """ + {% endif %} + # Can only run tasks on final cycle point if it exists, so skip for + # continuous trials. + {% if CSET_CYCLING_MODE != "trial" or CSET_TRIAL_END_DATE %} # Only runs on the final cycle. R1/$ = """ housekeeping_raw => finish_website => send_email housekeeping_raw => housekeeping_full """ + {% endif %} [runtime] [[root]] diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 3e8a3228f..1aff2c4bb 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -253,11 +253,12 @@ compulsory=true ns=Models and Cases title=Cycling mode description=Process case studies, or a continuous trial. -values="Case Study", "Trial" -trigger=template variables=CSET_CASE_DATES: "Case Study"; - template variables=CSET_TRIAL_START_DATE: "Trial"; - template variables=CSET_TRIAL_END_DATE: "Trial"; - template variables=CSET_TRIAL_CYCLE_PERIOD: "Trial"; +values="case_study", "trial" +value-titles=Case Study, Trial +trigger=template variables=CSET_CASE_DATES: "case_study"; + template variables=CSET_TRIAL_START_DATE: "trial"; + template variables=CSET_TRIAL_END_DATE: "trial"; + template variables=CSET_TRIAL_CYCLE_PERIOD: "trial"; compulsory=true sort-key=a1 From 18f8ae5225700d150c2bd6c2bc91b091a8aee6b2 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 9 Aug 2024 12:21:27 +0100 Subject: [PATCH 07/90] Thread model name through to output --- cset-workflow/includes/plot_spatial_surface_model_field.cylc | 5 ++++- src/CSET/_workflow_utils/run_cset_recipe.py | 5 ----- src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/cset-workflow/includes/plot_spatial_surface_model_field.cylc b/cset-workflow/includes/plot_spatial_surface_model_field.cylc index f0d69759d..8da61262c 100644 --- a/cset-workflow/includes/plot_spatial_surface_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_surface_model_field.cylc @@ -6,7 +6,10 @@ inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_surface_spatial_plot_sequence.yaml" - CSET_ADDOPTS = "--VARNAME={{field}}" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ MODEL_NUMBER = {{model_number}} {% endfor %} {% endfor %} diff --git a/src/CSET/_workflow_utils/run_cset_recipe.py b/src/CSET/_workflow_utils/run_cset_recipe.py index 982e72539..a76bdfd4f 100755 --- a/src/CSET/_workflow_utils/run_cset_recipe.py +++ b/src/CSET/_workflow_utils/run_cset_recipe.py @@ -17,11 +17,6 @@ def subprocess_env(): """Create a dictionary of amended environment variables for subprocess.""" env_mapping = dict(os.environ) - cycle_point = env_mapping["CYLC_TASK_CYCLE_POINT"] - # Add validity time based on cycle point. - env_mapping["CSET_ADDOPTS"] = ( - f"{os.getenv('CSET_ADDOPTS', '')} --VALIDITY_TIME={cycle_point}" - ) return env_mapping diff --git a/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml b/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml index 7f3a114fa..ed6999619 100644 --- a/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml +++ b/src/CSET/recipes/generic_surface_spatial_plot_sequence.yaml @@ -1,6 +1,6 @@ category: Surface Spatial Plot -title: Surface $VARNAME -description: Extracts and plots the surface $VARNAME from a file. +title: $MODEL_NAME Surface $VARNAME +description: Extracts and plots the surface $VARNAME for all times in $MODEL_NAME. steps: - operator: read.read_cube From c0827cf7a552a3a7752ce181dd474ed82541a94b Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 09:23:00 +0100 Subject: [PATCH 08/90] Remove unneeded recipes They have all been replaced my the more generic version that work for both the UM and LFRic. --- cset-workflow/meta/rose-meta.conf | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 1aff2c4bb..144928a5e 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -1598,14 +1598,6 @@ type=python_boolean compulsory=true sort-key=surface2 -[template variables=LFRIC_PLOT_SPATIAL_SURFACE_MODEL_FIELD] -ns=Diagnostics/Quicklook -description=Create plots for the specified surface fields for structured LFRic data. -help=See includes/lfric_plot_spatial_surface_model_field.cylc -type=python_boolean -compulsory=true -sort-key=surface2 - [template variables=DOMAIN_MEAN_SURFACE_TIME_SERIES] ns=Diagnostics/Quicklook description=Create time series plot of surface field domain mean. @@ -1614,14 +1606,6 @@ type=python_boolean compulsory=true sort-key=surface3 -[template variables=LFRIC_DOMAIN_MEAN_SURFACE_TIME_SERIES] -ns=Diagnostics/Quicklook -description=Create time series plot of surface field domain mean for structured LFRic data. -help=See includes/lfric_deterministic_domain_mean_surface_time_series.cylc -type=python_boolean -compulsory=true -sort-key=surface3 - [template variables=METPLUS_POINT_STAT] ns=Diagnostics/Verification description=Run METplus point stat in the workflow. From e3ad9676213c0cb9a2b28b98c979337f03f423dd Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 12:09:07 +0100 Subject: [PATCH 09/90] Replace dots with p in task names, to support floats --- cset-workflow/lib/python/jinja_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cset-workflow/lib/python/jinja_utils.py b/cset-workflow/lib/python/jinja_utils.py index 30be43aa6..b00caa200 100644 --- a/cset-workflow/lib/python/jinja_utils.py +++ b/cset-workflow/lib/python/jinja_utils.py @@ -103,6 +103,8 @@ def sanitise_task_name(s: str): # Ensure the first character is alphanumeric. if not s[0].isalnum(): s = f"sanitised_{s}" + # Specifically replace `.` with `p`, as in 3p5. + s = s.replace(".", "p") # Replace invalid characters with underscores. s = "".join(c if c.isalnum() or c in "-+%@" else "_" for c in s) # Ensure the name is not a reserved name. From f3f89a4eedf528a699d7f202f6cf0d3e09d0bc0f Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 12:09:42 +0100 Subject: [PATCH 10/90] Convert domain_mean_time_series include file --- ...istic_domain_mean_surface_time_series.cylc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc b/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc index aaa5c8ffc..69b9cdac3 100644 --- a/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc @@ -1,16 +1,16 @@ {% if DOMAIN_MEAN_SURFACE_TIME_SERIES %} -{% for model_field in SURFACE_MODEL_FIELDS %} +{% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} [runtime] - [[pre_process_domain_mean_surface_time_series_{{model_field}}]] - inherit = PARALLEL + [[generic_surface_domain_mean_time_series_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_surface_domain_mean_time_series.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}}" - - [[collate_domain_mean_surface_time_series_{{model_field}}]] - inherit = COLLATE - [[[environment]]] - CSET_RECIPE_NAME = "generic_surface_domain_mean_time_series.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}}" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} {% endfor %} {% endif %} From 2fe51c880827d9f56a3cb84526406a06604e590e Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 12:11:11 +0100 Subject: [PATCH 11/90] Convert domain mean time series recipe --- .../generic_surface_domain_mean_time_series.yaml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml b/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml index caba04a77..e99a6e31a 100644 --- a/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml +++ b/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml @@ -2,8 +2,7 @@ category: Time Series title: Domain mean surface $VARNAME time series description: Plots a time series of the domain mean surface $VARNAME. -# Parallel steps. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints @@ -17,24 +16,11 @@ parallel: operator: constraints.generate_level_constraint coordinate: "pressure" levels: [] - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME - operator: collapse.collapse coordinate: [grid_latitude, grid_longitude] method: MEAN - # Save domain meaned variable to a file per validity time. - - operator: write.write_cube_to_nc - filename: intermediate/domain_mean - -# Collation steps. -# Reads in intermediate cube and plots it. -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - # Make a single NetCDF with all the data inside it. - operator: write.write_cube_to_nc overwrite: True From 4ec94c3070b697c21ae716f5e9f581d5b3fbfe99 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 10:31:01 +0100 Subject: [PATCH 12/90] Only pass WEB_DIR to tasks that need it --- cset-workflow/flow.cylc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index b1cad3ce9..f77c73fe1 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -83,7 +83,6 @@ URL = https://metoffice.github.io/CSET {% endif %} LOGLEVEL = {{LOGLEVEL}} - WEB_DIR = {{WEB_DIR}} COLORBAR_FILE = {{COLORBAR_FILE}} PLOT_RESOLUTION = {{PLOT_RESOLUTION}} @@ -131,6 +130,8 @@ URL = https://metoffice.github.io/CSET [[install_website_skeleton]] # Copies the static files that make up the web interface. + [[[environment]]] + WEB_DIR = {{WEB_DIR}} {% for model in models %} [[fetch_fcst_m{{model["number"]}}]] @@ -167,6 +168,7 @@ URL = https://metoffice.github.io/CSET platform = localhost [[[environment]]] WEB_ADDR = {{WEB_ADDR}} + WEB_DIR = {{WEB_DIR}} # Include files bring their own graph and runtime sections. From baa5400efe44ad6c01a30b5d7c68b0635f3e783f Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 12:14:48 +0100 Subject: [PATCH 13/90] Remove install_local_cset task from workflow --- cset-workflow/flow.cylc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index f77c73fe1..f1efed676 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -25,9 +25,7 @@ URL = https://metoffice.github.io/CSET [[graph]] # Only runs on the first cycle. R1/^ = """ - build_conda => install_local_cset - build_conda => install_website_skeleton - install_local_cset & install_website_skeleton => setup_complete + build_conda => install_website_skeleton => setup_complete """ {% if CSET_CYCLING_MODE == "case_study" %} From c65681592d38ac8407882605eef36d70108b8a76 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 12:44:36 +0100 Subject: [PATCH 14/90] Differentiate output directories for different case dates Use cycle point for deterministic resolving. --- src/CSET/_workflow_utils/run_cset_recipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CSET/_workflow_utils/run_cset_recipe.py b/src/CSET/_workflow_utils/run_cset_recipe.py index a76bdfd4f..51f29637d 100755 --- a/src/CSET/_workflow_utils/run_cset_recipe.py +++ b/src/CSET/_workflow_utils/run_cset_recipe.py @@ -56,7 +56,8 @@ def recipe_id(): def output_directory(): """Get the plot output directory for the recipe.""" share_directory = os.environ["CYLC_WORKFLOW_SHARE_DIR"] - return f"{share_directory}/web/plots/{recipe_id()}" + cycle_point = os.environ["CYLC_TASK_CYCLE_POINT"] + return f"{share_directory}/web/plots/{recipe_id()}_{cycle_point}" def data_directory(): From 37e0b357543c7006f26e5f8c76dfda6af94c12f9 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 13:56:15 +0100 Subject: [PATCH 15/90] Remove duplicated include file It was also invalid due to referencing non-existent variables. --- .../deterministic_domain_histogram_series.cylc | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 cset-workflow/includes/deterministic_domain_histogram_series.cylc diff --git a/cset-workflow/includes/deterministic_domain_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_histogram_series.cylc deleted file mode 100644 index 93db04145..000000000 --- a/cset-workflow/includes/deterministic_domain_histogram_series.cylc +++ /dev/null @@ -1,16 +0,0 @@ -{% if DOMAIN_HISTOGRAM_SERIES %} -[runtime] -{% for model_field in MODEL_LEVEL_MODEL_FIELDS %} - [[pre_process_deterministic_domain_histogram_series_{{model_field}}]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "generic_histogram_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{UM_MODEL_LEVELS}}'" - - [[collate_deterministic_domain_histogram_series_{{model_field}}]] - inherit = COLLATE - [[[environment]]] - CSET_RECIPE_NAME = "generic_histogram_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{UM_MODEL_LEVELS}}'" -{% endfor %} -{% endif %} From 8ae9b997d6a945f83393181c0154ddad8bf92557 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 13:58:25 +0100 Subject: [PATCH 16/90] Convert more recipes and include files --- ...stic_single_point_surface_time_series.cylc | 23 +++++++++++-------- ...neric_surface_domain_mean_time_series.yaml | 2 +- ...eric_surface_single_point_time_series.yaml | 15 +----------- 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc b/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc index 998018ee2..7f6ced215 100644 --- a/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc +++ b/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc @@ -1,16 +1,19 @@ {% if SURFACE_SINGLE_POINT_TIME_SERIES %} -{% for model_field in SURFACE_MODEL_FIELDS %} +{% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} [runtime] - [[pre_process_surface_single_point_time_series_{{model_field}}]] - inherit = PARALLEL + [[generic_surface_single_point_time_series_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_surface_single_point_time_series.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}} --LONGITUDE_POINT={{LONGITUDE_POINT}} --LATITUDE_POINT={{LATITUDE_POINT}} --SINGLE_POINT_METHOD={{SINGLE_POINT_METHOD}}" - - [[collate_surface_single_point_time_series_{{model_field}}]] - inherit = COLLATE - [[[environment]]] - CSET_RECIPE_NAME = "generic_surface_single_point_time_series.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}} --LONGITUDE_POINT={{LONGITUDE_POINT}} --LATITUDE_POINT={{LATITUDE_POINT}} --SINGLE_POINT_METHOD={{SINGLE_POINT_METHOD}}" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + --LONGITUDE_POINT='{{LONGITUDE_POINT}}' + --LATITUDE_POINT='{{LATITUDE_POINT}}' + --SINGLE_POINT_METHOD='{{SINGLE_POINT_METHOD}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} {% endfor %} {% endif %} diff --git a/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml b/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml index e99a6e31a..c75a64dfb 100644 --- a/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml +++ b/src/CSET/recipes/generic_surface_domain_mean_time_series.yaml @@ -1,5 +1,5 @@ category: Time Series -title: Domain mean surface $VARNAME time series +title: $MODEL_NAME Domain mean surface $VARNAME time series description: Plots a time series of the domain mean surface $VARNAME. steps: diff --git a/src/CSET/recipes/generic_surface_single_point_time_series.yaml b/src/CSET/recipes/generic_surface_single_point_time_series.yaml index ca39d2dea..a7ac998f9 100644 --- a/src/CSET/recipes/generic_surface_single_point_time_series.yaml +++ b/src/CSET/recipes/generic_surface_single_point_time_series.yaml @@ -1,5 +1,5 @@ category: Time Series -title: Time series of $VARNAME at $LATITUDE_POINT N, $LONGITUDE_POINT E +title: $MODEL_NAME Time series of $VARNAME at $LATITUDE_POINT N, $LONGITUDE_POINT E description: Plots a time series of the surface $VARNAME at a selected gridpoint. # Parallel steps. @@ -14,25 +14,12 @@ parallel: operator: constraints.generate_level_constraint levels: [] coordinate: "pressure" - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME - operator: regrid.regrid_to_single_point lat_pt: $LATITUDE_POINT lon_pt: $LONGITUDE_POINT method: $SINGLE_POINT_METHOD - # Save single-point variable to a file per validity time. - - operator: write.write_cube_to_nc - filename: intermediate/single_point_values - -# Collation steps. -# Reads in intermediate cube and plots it. -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - # Make a single NetCDF with all the data inside it. - operator: write.write_cube_to_nc overwrite: True From 7b764f9e7fb666e54aba7999eaf3dab6d072da9c Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 14:00:33 +0100 Subject: [PATCH 17/90] Convert recipe and include file --- ...istic_domain_surface_histogram_series.cylc | 20 +++++++++---------- .../generic_surface_histogram_series.yaml | 11 ---------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc index 29908126a..e031b892d 100644 --- a/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc +++ b/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc @@ -1,16 +1,16 @@ {% if DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD %} +{% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} [runtime] -{% for model_field in SURFACE_MODEL_FIELDS %} - [[pre_process_deterministic_domain_surface_histogram_series_{{model_field}}]] - inherit = PARALLEL + [[generic_surface_histogram_series_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_surface_histogram_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}'" - - [[collate_deterministic_domain_surface_histogram_series_{{model_field}}]] - inherit = COLLATE - [[[environment]]] - CSET_RECIPE_NAME = "generic_surface_histogram_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}'" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} {% endfor %} {% endif %} diff --git a/src/CSET/recipes/generic_surface_histogram_series.yaml b/src/CSET/recipes/generic_surface_histogram_series.yaml index 9e7812837..2425c2e34 100644 --- a/src/CSET/recipes/generic_surface_histogram_series.yaml +++ b/src/CSET/recipes/generic_surface_histogram_series.yaml @@ -14,22 +14,11 @@ parallel: variable_constraint: operator: constraints.generate_var_constraint varname: $VARNAME - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME pressure_level_constraint: operator: constraints.generate_level_constraint coordinate: pressure levels: [] - - operator: write.write_cube_to_nc - filename: intermediate/histogram - - -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - - operator: write.write_cube_to_nc overwrite: True From a87ed7ee34a48f34dfaf30ca72002a7f9ec5e402 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 20 Aug 2024 16:00:07 +0100 Subject: [PATCH 18/90] Convert more recipes --- .../deterministic_plot_cape_ratio.cylc | 15 ++++--- .../deterministic_plot_inflow_properties.cylc | 15 ++++--- src/CSET/recipes/CAPE_ratio_plot.yaml | 44 +++++++------------ .../recipes/inflow_layer_properties_plot.yaml | 24 +++------- 4 files changed, 38 insertions(+), 60 deletions(-) diff --git a/cset-workflow/includes/deterministic_plot_cape_ratio.cylc b/cset-workflow/includes/deterministic_plot_cape_ratio.cylc index 5d26336f9..ee0f0be65 100644 --- a/cset-workflow/includes/deterministic_plot_cape_ratio.cylc +++ b/cset-workflow/includes/deterministic_plot_cape_ratio.cylc @@ -1,12 +1,13 @@ {% if DETERMINISTIC_PLOT_CAPE_RATIO %} +{% for model in models %} [runtime] - [[parallel_plot_cape_ratio]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "CAPE_ratio_plot.yaml" - - [[collate_plot_cape_ratio]] - inherit = COLLATE + [[plot_cape_ratio_m{{model["number"]}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "CAPE_ratio_plot.yaml" + CSET_ADDOPTS = """ + --MODEL_NAME='{{model["name"]}}' + """ + MODEL_NUMBER = {{model["number"]}} +{% endfor %} {% endif %} diff --git a/cset-workflow/includes/deterministic_plot_inflow_properties.cylc b/cset-workflow/includes/deterministic_plot_inflow_properties.cylc index e3c8d007e..9593172b7 100644 --- a/cset-workflow/includes/deterministic_plot_inflow_properties.cylc +++ b/cset-workflow/includes/deterministic_plot_inflow_properties.cylc @@ -1,12 +1,13 @@ {% if DETERMINISTIC_PLOT_INFLOW_PROPERTIES %} +{% for model in models %} [runtime] - [[parallel_inflow_layer_properties]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "inflow_layer_properties_plot.yaml" - - [[collate_inflow_layer_properties]] - inherit = COLLATE + [[inflow_layer_properties_plot_m{{model["number"]}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "inflow_layer_properties_plot.yaml" + CSET_ADDOPTS = """ + --MODEL_NAME='{{model["name"]}}' + """ + MODEL_NUMBER = {{model["number"]}} +{% endfor %} {% endif %} diff --git a/src/CSET/recipes/CAPE_ratio_plot.yaml b/src/CSET/recipes/CAPE_ratio_plot.yaml index 6095f5660..1009c780b 100644 --- a/src/CSET/recipes/CAPE_ratio_plot.yaml +++ b/src/CSET/recipes/CAPE_ratio_plot.yaml @@ -1,43 +1,29 @@ category: Diagnostics -title: CAPE ratio plot +title: $MODEL_NAME CAPE ratio plot description: | Extracts data required for, and calculates the CAPE ratio diagnostic, plotting on a map. -parallel: +steps: - operator: read.read_cubes - constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME + - operator: convection.cape_ratio SBCAPE: - operator: filters.filter_cubes - constraint: - operator: constraints.generate_stash_constraint - stash: m01s20i114 + operator: filters.filter_cubes + constraint: + operator: constraints.generate_var_constraint + varname: m01s20i114 MUCAPE: - operator: filters.filter_cubes - constraint: - operator: constraints.generate_stash_constraint - stash: m01s20i112 + operator: filters.filter_cubes + constraint: + operator: constraints.generate_var_constraint + varname: m01s20i112 MUCIN: - operator: filters.filter_cubes - constraint: - operator: constraints.generate_stash_constraint - stash: m01s20i113 + operator: filters.filter_cubes + constraint: + operator: constraints.generate_var_constraint + varname: m01s20i113 MUCIN_thresh: -75 - - operator: filters.filter_cubes - constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME - - - operator: write.write_cube_to_nc - filename: intermediate/cape_ratio - -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - - operator: write.write_cube_to_nc overwrite: True diff --git a/src/CSET/recipes/inflow_layer_properties_plot.yaml b/src/CSET/recipes/inflow_layer_properties_plot.yaml index c467e4020..a58e03818 100644 --- a/src/CSET/recipes/inflow_layer_properties_plot.yaml +++ b/src/CSET/recipes/inflow_layer_properties_plot.yaml @@ -1,37 +1,27 @@ category: Diagnostics -title: Inflow layer properties plot +title: $MODEL_NAME Inflow layer properties plot description: | Extracts data required for, and calculates the Inflow properties diagnostic, plotting on a spatial map. parallel: - operator: read.read_cubes - constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME - operator: convection.inflow_layer_properties EIB: operator: filters.filter_cubes constraint: - operator: constraints.generate_stash_constraint - stash: m01s20i119 + operator: constraints.generate_var_constraint + varname: m01s20i119 BLheight: operator: filters.filter_cubes constraint: - operator: constraints.generate_stash_constraint - stash: m01s00i025 + operator: constraints.generate_var_constraint + varname: m01s00i025 Orography: operator: filters.filter_cubes constraint: - operator: constraints.generate_stash_constraint - stash: m01s00i033 - - - operator: write.write_cube_to_nc - filename: intermediate/inflow_layer - -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc + operator: constraints.generate_var_constraint + varname: m01s00i033 - operator: plot.spatial_contour_plot From 39b9816383b426bec2f64d9808654193cc560745 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 12:21:49 +0100 Subject: [PATCH 19/90] Split rose metadata into multiple files Also rearrange pressure level metadata. --- cset-workflow/meta/diagnostics/rose-meta.conf | 451 ++++++++++++ cset-workflow/meta/rose-meta.conf | 657 +++--------------- src/CSET/operators/constraints.py | 2 +- 3 files changed, 555 insertions(+), 555 deletions(-) create mode 100644 cset-workflow/meta/diagnostics/rose-meta.conf diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf new file mode 100644 index 000000000..8b3d47adb --- /dev/null +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -0,0 +1,451 @@ +################################################################################ +# Diagnostics +################################################################################ + +[template variables] + +# Surface fields. +[template variables=SURFACE_MODEL_FIELDS] +ns=Diagnostics/Quicklook +title=Surface model fields +description=Per model field names. +help=Variable names for surface variables. The names across a row should match + the same physical phenomenon, and use the appropriate standard, long, or + field name, or the STASH code for each model. Blank entries indicate that a + model does not have that phenomenon, so it will be skipped. + + Ignore the boxes for models that are not enabled. +compulsory=true +element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 +type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted +length=: +sort-key=surface1 + +[template variables=PLOT_SPATIAL_SURFACE_MODEL_FIELD] +ns=Diagnostics/Quicklook +description=Create plots for the specified surface fields. +help=See includes/plot_spatial_surface_model_field.cylc +type=python_boolean +compulsory=true +sort-key=surface2 + +[template variables=DOMAIN_MEAN_SURFACE_TIME_SERIES] +ns=Diagnostics/Quicklook +description=Create time series plot of surface field domain mean. +help=See includes/deterministic_domain_mean_surface_time_series.cylc +type=python_boolean +compulsory=true +sort-key=surface3 + +[template variables=DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD] +ns=Diagnostics/Quicklook +description=Create a series of histogram plots for selected surface fields for each cycle time. +help=See includes/deterministic_domain_surface_histogram_series.cylc. This diagnostic requires the user to also enable and enter the SURFACE_MODEL_FIELDS. The series_coordinate in the recipe is set to "time", but can be switched to any other coordinate. +type=python_boolean +compulsory=true +sort-key=surface4 + +# Pressure level fields. +[template variables=PRESSURE_LEVEL_MODEL_FIELDS] +ns=Diagnostics/Quicklook +title=Pressure level model fields +description=Per model field names. +help=Variable names for pressure level variables. The names across a row should + match the same physical phenomenon, and use the appropriate standard, long, + or field name, or the STASH code for each model. Blank entries indicate that + a model does not have that phenomenon, so it will be skipped. + + Ignore the boxes for models that are not enabled. +compulsory=true +element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 +type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted +length=: +sort-key=pressure1 + +[template variables=PRESSURE_LEVELS] +ns=Diagnostics/Quicklook +description=List of pressure levels to generate plots for. +help=A list of integer pressure levels in hPa in python list format. E.g: [850, 1000] +type=python_list +compulsory=true +sort-key=pressure2 + +[template variables=PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD] +ns=Diagnostics/Quicklook +description=Create plots for the specified pressure level fields. +help=See includes/generic_spatial_plevel_model_field.cylc +type=python_boolean +compulsory=true +sort-key=pressure3 + +[template variables=DOMAIN_MEAN_VERTICAL_PROFILE_SERIES] +ns=Diagnostics/Quicklook +description=Domain averaged vertical profile of pressure levels for each time. +help=See includes/deterministic_domain_mean_vertical_profile_series.cylc +type=python_boolean +compulsory=true +sort-key=pressure3 + +# Model-level fields. +# Temporerorly disabled while we figure out LFRic equivalent. + +# [template variables=MODEL_LEVEL_MODEL_FIELDS] +# ns=Diagnostics/Quicklook +# title=Model level model fields +# description=Per model field names. +# help=Variable names for model level variables. The names across a row should +# match the same physical phenomenon, and use the appropriate standard, long, +# or field name, or the STASH code for each model. Blank entries indicate that +# a model does not have that phenomenon, so it will be skipped. + +# Ignore the boxes for models that are not enabled. +# compulsory=true +# element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 +# type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted +# length=: +# sort-key=modellevel1 + +# [template variables=UM_MODEL_LEVELS] +# ns=Diagnostics +# description=List of UM model levels to generate plots for. +# help=Include an integer list of model levels in python list format, e.g: [1,2,3,4] +# type=python_list +# compulsory=true +# sort-key=modellevel2 + +# [template variables=PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD] +# ns=Diagnostics +# description=Create plots for the specified model level fields. +# help=See includes/generic_spatial_mlevel_model_field.cylc +# type=python_boolean +# compulsory=true +# sort-key=modellevel3 + +# [template variables=DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES] +# ns=Diagnostics +# description=Domain averaged vertical profile for each validity time. Requires fields UM_MODEL_LEVELS and MODEL_LEVEL_MODEL_FIELDS as input. +# help=See includes/deterministic_domain_mean_vertical_profile_modellevel_series.cylc +# type=python_boolean +# compulsory=true +# sort-key=modellevel4 + + +# METplus. +[template variables=METPLUS_POINT_STAT] +ns=Diagnostics/Verification +description=Run METplus point stat in the workflow. +help=If True, it will enable the production of verification statistics against + point observations (i.e. land-synop, sonde,...). +type=python_boolean +trigger=template variables=METPLUS_OPT_CONFIG_KEYS: True; + template variables=METPLUS_OBS_DIR: True; +compulsory=true +sort-key=met1 + +[template variables=METPLUS_GRID_STAT] +ns=Diagnostics/Verification +description=Run METplus grid stat in the workflow. +help=If True, it will enable the production of verification statistics against + gridded data (i.e. radar, analysis,...). +type=python_boolean +trigger=template variables=METPLUS_OPT_CONFIG_KEYS: True; + template variables=METPLUS_ANA_DIR: True; +compulsory=true +sort-key=met2 + +[template variables=METPLUS_OPT_CONFIG_KEYS] +ns=Diagnostics/Verification +description=Which METplus configuration to run. +help=Selects the site/model specific configuration to use. The configuration must + exist as restricted files in + https://github.com/MetOffice/CSET-workflow/tree/main/app/{app_name}/file +type=quoted +value-hints="metoffice", "niwa" +sort-key=met3 + +[template variables=METPLUS_OBS_DIR] +ns=Diagnostics/Verification +description=Path to directory containing observations in MET ASCII format. +type=quoted +sort-key=met4 + +[template variables=METPLUS_ANA_DIR] +ns=Diagnostics/Verification +description=Path to directory containing analysis in MET netcdf format. +type=quoted +sort-key=met5 + +[template variables=DETERMINISTIC_PLOT_CAPE_RATIO] +ns=Diagnostics/Convection +description=Extracts data required for, and calculates the CAPE ratio diagnostic, plotting on a map. + Required STASH m01s20i114, m01s20i112, m01s20i113. +help=See includes/deterministic_plot_cape_ratio.cylc +type=python_boolean +compulsory=true +sort-key=cape1 + +[template variables=EXTRACT_TRANSECT] +ns=Diagnostics +description=Plot a cross section for variables defined in CS_VARS. +help=This functionality extracts data for each variable defined in CS_VARS, + and extracts a column along each point of the transect. The number of + points along the transect is determined by calculating the length of the + transect in degrees, and dividing this by the minimum grid spacing within the domain. + Additional dimensions (time, ensemble) are accepted and returned as a cross section. + This allows the user to compute slices through features of meteorological interest, + such as transects through valleys, or through a front as it passes through. +type=python_boolean +trigger=template variables=CS_STARTCOORDS: True; + template variables=CS_FINISHCOORDS: True; + template variables=CS_VARS: True; + template variables=CS_VERTLEV: True +compulsory=true +sort-key=cs1 + +[template variables=CS_STARTCOORDS] +ns=Diagnostics +description=Start latitude, longitude of the cross section. +help=The latitude, longitude coordinate with respect to the model grid where the + cross section will start i.e. the furthest left hand point of the plot, where the x + axis is distance along transect, and y axis is pressure level). +type=real,real +compulsory=true +sort-key=cs2 + +[template variables=CS_FINISHCOORDS] +ns=Diagnostics +description=Finish latitude, longitude of the cross section. +help=The latitude, longitude coordinate with respect to the model grid where the + cross section will finish i.e. the furthest right hand point of the plot, where the x + axis is distance along transect, and y axis is pressure level). +type=real,real +compulsory=true +sort-key=cs3 + +[template variables=CS_VARS] +ns=Diagnostics +description=List of all variables to extract. +help=A list of stash codes or variable names to extract from the data, and compute + the transect over. It must contain multi-level data (i.e. be at least 3D with pressure, + latitude and longitude coordinates). +type=python_list +compulsory=true +sort-key=cs4 + +[template variables=CS_VERTLEV] +ns=Diagnostics +description=Name of vertical coordinate to use for transect. +help=A string, containing the vertical coordinate name to use for the transect. Typically + pressure or model_level_number, this is specified in case the source files contain + cubes where the variable name is the same, but they are on different vertical grids, + and it would be ambiguous which cube the user wants to compute the transect on. +type=quoted +compulsory=true +sort-key=cs5 + +[template variables=SURFACE_SINGLE_POINT_TIME_SERIES] +ns=Diagnostics +description=Plot a time series at a single specified location in a surface field. +help=Include values of desired longitude and latitude. +type=python_boolean +compulsory=true +trigger=template variables=LATITUDE_POINT: True; + template variables=LONGITUDE_POINT: True; + template variables=SINGLE_POINT_METHOD: True; +sort-key=point1 + +[template variables=LATITUDE_POINT] +ns=Diagnostics +description=Latitude of selected point. Note that this could be rotated or not, depending on the data provided. +help=The latitude must exist within the domain. Value should be a float: for example, -1.5. +type=real +compulsory=true +sort-key=point2 + +[template variables=LONGITUDE_POINT] +ns=Diagnostics +description=Longitude of selected point. Note that this could be rotated or not, depending on the data provided. +help=The longitude must exist within the domain. Value should be a float: for example, 0.8. +type=real +compulsory=true +sort-key=point2 + +[template variables=SINGLE_POINT_METHOD] +ns=Diagnostics +description=Method used to map model data onto selected gridpoints. +help=Method used to map model data onto selected gridpoints. These are regrid methods available in Iris. +values="Nearest", "Linear" +compulsory=true +sort-key=point2 + +[template variables=BASIC_QQ_PLOT] +ns=Diagnostics +description=Create a basic quantile-quantile plot for variables specified collapsing over specified coordinates. + Produces a single Q-Q plot and so requires collapsing across all coordinates in the file. +help=The quantiles defined for each variable are 0, 0.001, 0.01, + 0.1,1,5,10,15,20,25,30,40,50,60,70,75,80,85,90,95,99,99.9,99.99, + 99.999,100. + If more than one variable is supplied e.g. MODEL_FIELDS_A='a','b',...; + MODEL_FIELDS_B='c','d',...; the Q-Q plot will compare 'a' vs. 'c', + 'b' vs. 'd', etc. + It is expected that the length of MODEL_FIELDS_A is the same as + MODEL_FIELDS_B. +trigger=template variables=MODEL_FIELDS_A: True; + template variables=VERTICAL_COORDINATE_A: True; + template variables=LEVELS_A: True; + template variables=MODEL_FIELDS_B: True; + template variables=VERTICAL_COORDINATE_B: True; + template variables=LEVELS_B: True; + template variables=COORDINATE_LIST: True; + template variables=ONE_TO_ONE: True; +type=python_boolean +compulsory=true +sort-key=qq1 + +[template variables=MODEL_FIELDS_A] +ns=Diagnostics +description=List of standard names for model fields to plot on y axis. + Must be the same length as MODEL_FIELDS_B. +help=Include a list of variable names in python list format["var1","var2"]. +type=python_list +compulsory=true +sort-key=qq1a + +[template variables=VERTICAL_COORDINATE_A] +ns=Diagnostics +description=Level coordinate name about which to constrain. The length of this + list should be the same as MODEL_FIELDS A. +help=Include a list of coordinate names in python list format["var1","var2"]. + If single levels are required use any vertical coordinate in the cube and + make sure to put LEVELS_A as []. +type=python_list +compulsory=true +sort-key=qq1b + +[template variables=LEVELS_A] +ns=Diagnostics +description=A list of levels to constrain over. The length of this + list should be the same as MODEL_FIELDS A. +help=Use "*" for all levels, INTEGERS for specific levels, and [] for no levels + (e.g., single level fields). +type=python_list +compulsory=true +sort-key=qq1c + +[template variables=MODEL_FIELDS_B] +ns=Diagnostics +description=List of standard names for model fields to plot on x axis. + Must be the same length as MODEL_FIELDS_A. +help=Include a list of variable names in python list format["var1","var2"]. +type=python_list +compulsory=true +sort-key=qq1d + +[template variables=VERTICAL_COORDINATE_B] +ns=Diagnostics +description=Level coordinate name about which to constrain. The length of this + list should be the same as MODEL_FIELDS B. +help=Include a list of coordinate names in python list format["var1","var2"]. + If single levels are required use any vertical coordinate in the cube and + make sure to put LEVELS_B as []. +type=python_list +compulsory=true +sort-key=qq1e + +[template variables=LEVELS_B] +ns=Diagnostics +description=A list of levels to constrain over. The length of this + list should be the same as MODEL_FIELDS B. +help=Use "*" for all levels, INTEGERS for specific levels, and [] for no levels + (e.g., single level fields). +type=python_list +compulsory=true +sort-key=qq1f + +[template variables=COORDINATE_LIST] +ns=Diagnostics +description=List of coordinates to collapse the percentiles over. The coordinate list + must be of the form [\\"var1\\",\\"var2\\",...].The values should be + all of the coordinates in the cube to ensure the output is 1D with the + percentiles as the coordinate. +help=Include a list of coordinates in python list format[\\"var1\\",\\"var2\\"]. +type=quoted +compulsory=true +sort-key=qq1g + +[template variables=ONE_TO_ONE] +ns=Diagnostics +description=One-to-one line option for plots. +help=Set to True if you want a one-to-one line; false if not. +type=python_boolean +compulsory=true +sort-key=qq1h + +[template variables=SELECT_SUBAREA] +ns=Diagnostics +description=Only analyse a subset of the region defined by the input data. +help=Select this option to run a recipe over a defined latitude-longitude range. +trigger=template variables=SUBAREA_LAT_BOUND_TOP: True; + =template variables=SUBAREA_LAT_BOUND_BOTTOM: True; + =template variables=SUBAREA_LON_BOUND_LEFT: True; + =template variables=SUBAREA_LON_BOUND_RIGHT: True; +type=python_boolean +compulsory=true +sort-key=subsection1 + +[template variables=SUBAREA_LAT_BOUND_TOP] +ns=Diagnostics +description=Top edge coordinate of the sub-area, real. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. +type=real +sort-key=subsection2 +compulsory=true + +[template variables=SUBAREA_LAT_BOUND_BOTTOM] +ns=Diagnostics +description=Bottom edge coordinate of the sub-area, real. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. +type=real +sort-key=subsection2 +compulsory=true + +[template variables=SUBAREA_LON_BOUND_LEFT] +ns=Diagnostics +description=Left edge coordinate of the sub-area, real. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. +type=real +sort-key=subsection2 +compulsory=true + +[template variables=SUBAREA_LON_BOUND_RIGHT] +ns=Diagnostics +description=Right edge coordinate of the sub-area, real. +help=Recommend looking at the input data to get these values. Uses the grid’s native units. +type=real +sort-key=subsection2 +compulsory=true + +[template variables=DETERMINISTIC_PLOT_INFLOW_PROPERTIES] +ns=Diagnostics/Convection +description=Extracts data required for, and calculates the inflow properties diagnostic, plotting on a map. + Required STASH m01s20i119, m01s00i025, m01s00i033. +help=See includes/deterministic_plot_inflow_properties.cylc +type=python_boolean +compulsory=true + +[template variables=DOMAIN_HISTOGRAM_SERIES] +ns=Diagnostics +description=Create a series of histogram plots for selected vertical levels for each cycle time. +help=See includes/deterministic_domain_histogram_series.cylc +trigger=template variables=HISTOGRAM_TYPE: True; +type=python_boolean +compulsory=true +sort-key=hist1 + +[template variables=HISTOGRAM_TYPE] +ns=Diagnostics +description=Type of histogram to plot. "step", "barstacked" or "stepfilled" +help=The type of histogram to plot. Options are "step" for a line histogram or "barstacked", "stepfilled" +values="step","barstacked","stepfilled" +compulsory=true +sort-key=hist3 diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 144928a5e..da769f6a2 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -1,6 +1,106 @@ -# Workflow’s configuration metadata +# Diagnostics settings are split into a separate file. +import=meta/diagnostics + [template variables] +################################################################################ +# General: Software environment and general non-scientific setup. +################################################################################ + +[template variables=LOGLEVEL] +ns=General +title=Logging level +description=How detailed the logs should be. +help=INFO only shows a general overview of what is happening, while DEBUG + enables in-operator logging, but produces a lot of output. +values="INFO", "DEBUG" +value-titles=Info, Debug +compulsory=true + +[template variables=HOUSEKEEPING_MODE] +ns=General +title=Housekeeping mode +description=How much housekeeping deletes. +help=None will delete nothing. + + Debug will only delete the downloads of initial data, and will keep + intermediate files. + + Standard deletes most intermediate files, save for the final data that is + plotted. +values=0, 1, 2 +value-titles=None, Debug, Standard +compulsory=true + +[template variables=COLORBAR_FILE] +ns=General +description=Filepath and name for colorbar details of each variable i.e. + name_of_filepath/name_of_filename. An example file is available under + CSET/cset-workflow/extra-meta/colorbar_dict_alphabetical.json +help=TODO +type=quoted +compulsory=true + +[template variables=PLOT_RESOLUTION] +ns=General +description=Resolution of output plot in dpi. +help=This is passed through to the plotting operators and sets the resolution + of the output plots to the given number of pixels per inch. If unset + defaults to 100 dpi. The plots are all 8 by 8 inches, so this corresponds + to 800 by 800 pixels. +type=integer +compulsory=true + +[template variables=WEB_DIR] +ns=General +description=Path to directory that is served by the webserver. +help=This will probably be under $HOME/public_html or similar. You will want to + ensure a subfolder is used as multiple files will be written here. +type=quoted +compulsory=true +sort-key=web2 + +[template variables=WEB_ADDR] +ns=General +title=Website address +description=The address at which the website is served. +help=This should be the address where your public_html or equivalent is served. + It might include a partial path, such as your username. + + E.g: https://example.com/~username/ +type=quoted +compulsory=true +sort-key=web1 + +[template variables=WEB_DIR] +ns=General +title=Web directory +description=Path to directory that is served by the webserver. +help=This will probably be under $HOME/public_html or similar. You will want to + ensure a subfolder is used as it will be replaced with a symbolic link. + + This is where the output of the workflow will be accessible from, through a + symlink to the workflow shared directory. Anything existing at the path will + be removed. + + E.g: $HOME/public_html/CSET +type=quoted +compulsory=true +sort-key=web2 + +[template variables=CSET_RUNAHEAD_LIMIT] +ns=General +title=Concurrent cycle limit +description=Number of simultaneous cycles. +help=The maximum number of cycles run in parallel. A larger number here will + finish quicker, but utilise more compute resources at once. For a large + enough workflow it may overwhelm the batch submission system, so it is + recommended to keep this below 10. +type=integer +range=0: +compulsory=true + + ################################################################################ # Environment ################################################################################ @@ -148,106 +248,10 @@ sort-key=met2 ################################################################################ -# General +# Models and Cases: Case and model settings. ################################################################################ -[template variables=LOGLEVEL] -ns=General -title=Logging level -description=How detailed the logs should be. -help=INFO only shows a general overview of what is happening, while DEBUG - enables in-operator logging, but produces a lot of output. -values="INFO", "DEBUG" -value-titles=Info, Debug -compulsory=true - -[template variables=HOUSEKEEPING_MODE] -ns=General -title=Housekeeping mode -description=How much housekeeping deletes. -help=None will delete nothing. - - Debug will only delete the downloads of initial data, and will keep - intermediate files. - - Standard deletes most intermediate files, save for the final data that is - plotted. -values=0, 1, 2 -value-titles=None, Debug, Standard -compulsory=true - -[template variables=COLORBAR_FILE] -ns=General -description=Filepath and name for colorbar details of each variable i.e. - name_of_filepath/name_of_filename. An example file is available under - CSET/cset-workflow/extra-meta/colorbar_dict_alphabetical.json -help=TODO -type=quoted -compulsory=true - -[template variables=PLOT_RESOLUTION] -ns=General -description=Resolution of output plot in dpi. -help=This is passed through to the plotting operators and sets the resolution - of the output plots to the given number of pixels per inch. If unset - defaults to 100 dpi. The plots are all 8 by 8 inches, so this corresponds - to 800 by 800 pixels. -type=integer -compulsory=true - -[template variables=WEB_DIR] -ns=General -description=Path to directory that is served by the webserver. -help=This will probably be under $HOME/public_html or similar. You will want to - ensure a subfolder is used as multiple files will be written here. -type=quoted -compulsory=true -sort-key=web2 - -[template variables=WEB_ADDR] -ns=General -title=Website address -description=The address at which the website is served. -help=This should be the address where your public_html or equivalent is served. - It might include a partial path, such as your username. - - E.g: https://example.com/~username/ -type=quoted -compulsory=true -sort-key=web1 - -[template variables=WEB_DIR] -ns=General -title=Web directory -description=Path to directory that is served by the webserver. -help=This will probably be under $HOME/public_html or similar. You will want to - ensure a subfolder is used as it will be replaced with a symbolic link. - - This is where the output of the workflow will be accessible from, through a - symlink to the workflow shared directory. Anything existing at the path will - be removed. - - E.g: $HOME/public_html/CSET -type=quoted -compulsory=true -sort-key=web2 - -[template variables=CSET_RUNAHEAD_LIMIT] -ns=General -title=Concurrent cycle limit -description=Number of simultaneous cycles. -help=The maximum number of cycles run in parallel. A larger number here will - finish quicker, but utilise more compute resources at once. For a large - enough workflow it may overwhelm the batch submission system, so it is - recommended to keep this below 10. -type=integer -range=0: -compulsory=true - - -################################################################################ -# Models and Cases -################################################################################ +[template variables] [template variables=CSET_CYCLING_MODE] ns=Models and Cases @@ -1568,458 +1572,3 @@ help=The preprocessing recipe to use. type=quoted compulsory=true sort-key=c2 - - -################################################################################ -# Diagnostics -################################################################################ - -[template variables=SURFACE_MODEL_FIELDS] -ns=Diagnostics/Quicklook -title=Surface model fields -description=Per model field names. -help=Variable names for surface variables. The names across a row should match - the same physical phenomenon, and use the appropriate standard, long, or - field name, or the STASH code for each model. Blank entries indicate that a - model does not have that phenomenon, so it will be skipped. - - Ignore the boxes for models that are not enabled. -compulsory=true -element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 -type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted -length=: -sort-key=surface1 - -[template variables=PLOT_SPATIAL_SURFACE_MODEL_FIELD] -ns=Diagnostics/Quicklook -description=Create plots for the specified surface fields. -help=See includes/plot_spatial_surface_model_field.cylc -type=python_boolean -compulsory=true -sort-key=surface2 - -[template variables=DOMAIN_MEAN_SURFACE_TIME_SERIES] -ns=Diagnostics/Quicklook -description=Create time series plot of surface field domain mean. -help=See includes/deterministic_domain_mean_surface_time_series.cylc -type=python_boolean -compulsory=true -sort-key=surface3 - -[template variables=METPLUS_POINT_STAT] -ns=Diagnostics/Verification -description=Run METplus point stat in the workflow. -help=If True, it will enable the production of verification statistics against - point observations (i.e. land-synop, sonde,...). -type=python_boolean -trigger=template variables=METPLUS_OPT_CONFIG_KEYS: True; - template variables=METPLUS_OBS_DIR: True; -compulsory=true -sort-key=met1 - -[template variables=METPLUS_GRID_STAT] -ns=Diagnostics/Verification -description=Run METplus grid stat in the workflow. -help=If True, it will enable the production of verification statistics against - gridded data (i.e. radar, analysis,...). -type=python_boolean -trigger=template variables=METPLUS_OPT_CONFIG_KEYS: True; - template variables=METPLUS_ANA_DIR: True; -compulsory=true -sort-key=met2 - -[template variables=METPLUS_OPT_CONFIG_KEYS] -ns=Diagnostics/Verification -description=Which METplus configuration to run. -help=Selects the site/model specific configuration to use. The configuration must - exist as restricted files in - https://github.com/MetOffice/CSET-workflow/tree/main/app/{app_name}/file -type=quoted -value-hints="metoffice", "niwa" -sort-key=met3 - -[template variables=METPLUS_OBS_DIR] -ns=Diagnostics/Verification -description=Path to directory containing observations in MET ASCII format. -type=quoted -sort-key=met4 - -[template variables=METPLUS_ANA_DIR] -ns=Diagnostics/Verification -description=Path to directory containing analysis in MET netcdf format. -type=quoted -sort-key=met5 - -[template variables=DETERMINISTIC_PLOT_CAPE_RATIO] -ns=Diagnostics/Convection -description=Extracts data required for, and calculates the CAPE ratio diagnostic, plotting on a map. - Required STASH m01s20i114, m01s20i112, m01s20i113. -help=See includes/deterministic_plot_cape_ratio.cylc -type=python_boolean -compulsory=true -sort-key=cape1 - -[template variables=EXTRACT_TRANSECT] -ns=Diagnostics -description=Plot a cross section for variables defined in CS_VARS. -help=This functionality extracts data for each variable defined in CS_VARS, - and extracts a column along each point of the transect. The number of - points along the transect is determined by calculating the length of the - transect in degrees, and dividing this by the minimum grid spacing within the domain. - Additional dimensions (time, ensemble) are accepted and returned as a cross section. - This allows the user to compute slices through features of meteorological interest, - such as transects through valleys, or through a front as it passes through. -type=python_boolean -trigger=template variables=CS_STARTCOORDS: True; - template variables=CS_FINISHCOORDS: True; - template variables=CS_VARS: True; - template variables=CS_VERTLEV: True -compulsory=true -sort-key=cs1 - -[template variables=CS_STARTCOORDS] -ns=Diagnostics -description=Start latitude, longitude of the cross section. -help=The latitude, longitude coordinate with respect to the model grid where the - cross section will start i.e. the furthest left hand point of the plot, where the x - axis is distance along transect, and y axis is pressure level). -type=real,real -compulsory=true -sort-key=cs2 - -[template variables=CS_FINISHCOORDS] -ns=Diagnostics -description=Finish latitude, longitude of the cross section. -help=The latitude, longitude coordinate with respect to the model grid where the - cross section will finish i.e. the furthest right hand point of the plot, where the x - axis is distance along transect, and y axis is pressure level). -type=real,real -compulsory=true -sort-key=cs3 - -[template variables=CS_VARS] -ns=Diagnostics -description=List of all variables to extract. -help=A list of stash codes or variable names to extract from the data, and compute - the transect over. It must contain multi-level data (i.e. be at least 3D with pressure, - latitude and longitude coordinates). -type=python_list -compulsory=true -sort-key=cs4 - -[template variables=CS_VERTLEV] -ns=Diagnostics -description=Name of vertical coordinate to use for transect. -help=A string, containing the vertical coordinate name to use for the transect. Typically - pressure or model_level_number, this is specified in case the source files contain - cubes where the variable name is the same, but they are on different vertical grids, - and it would be ambiguous which cube the user wants to compute the transect on. -type=quoted -compulsory=true -sort-key=cs5 - -[template variables=DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD] -ns=Diagnostics -description=Create a series of histogram plots for selected surface fields for each cycle time. -help=See includes/deterministic_domain_surface_histogram_series.cylc. This diagnostic requires the user to also enable and enter the SURFACE_MODEL_FIELDS. The series_coordinate in the recipe is set to "time", but can be switched to any other coordinate. -type=python_boolean -compulsory=true -sort-key=surface2 - -[template variables=DOMAIN_MEAN_VERTICAL_PROFILE_SERIES] -ns=Diagnostics -description=Domain averaged vertical profile for each validity time. -help=See includes/deterministic_domain_mean_vertical_profile_series.cylc -type=python_boolean -compulsory=true -sort-key=surface1 - -[template variables=DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES] -ns=Diagnostics -description=Domain averaged vertical profile for each validity time. Requires fields UM_MODEL_LEVELS and MODEL_LEVEL_MODEL_FIELDS as input. -help=See includes/deterministic_domain_mean_vertical_profile_modellevel_series.cylc -type=python_boolean -compulsory=true -sort-key=surface1 - -[template variables=PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD] -ns=Diagnostics -description=Create plots for the specified pressure level fields. -help=See includes/generic_spatial_plevel_model_field.cylc -type=python_boolean -trigger=template variables=PRESSURE_LEVEL_MODEL_FIELDS: True; - template variables=PRESSURE_LEVELS: True; -compulsory=true -sort-key=pressure1 - -[template variables=PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD] -ns=Diagnostics -description=Create plots for the specified model level fields. -help=See includes/generic_spatial_mlevel_model_field.cylc -type=python_boolean -trigger=template variables=MODEL_LEVEL_MODEL_FIELDS: True; - template variables=UM_MODEL_LEVELS: True; -compulsory=true -sort-key=pressure1 - -[template variables=PRESSURE_LEVEL_MODEL_FIELDS] -ns=Diagnostics -description=List of standard names of model fields on pressure levels to plot. -help=Include a list of variable names in python list format["var1","var2"]. -type=python_list -compulsory=true -sort-key=pressure2 - -[template variables=MODEL_LEVEL_MODEL_FIELDS] -ns=Diagnostics -description=List of standard names of UM model fields on model levels to plot. -help=Include a list of variable names in python list format, e.g: ["var1","var2"] -type=python_list -compulsory=true -sort-key=pressure2 - -[template variables=PRESSURE_LEVELS] -ns=Diagnostics -description=List of pressure levels to generate plots for. -help=Include an INTEGER list of pressure levels in hPa in python list format[1000,850]. -type=python_list -compulsory=true -sort-key=pressure2 - -[template variables=UM_MODEL_LEVELS] -ns=Diagnostics -description=List of UM model levels to generate plots for. -help=Include an integer list of model levels in python list format, e.g: [1,2,3,4] -type=python_list -compulsory=true -sort-key=pressure2 - -[template variables=STASH_CODES] -ns=Diagnostics -description=List of STASH codes to plot. -type=python_list -compulsory=true -sort-key=stash1 - -[template variables=PLOT_SPATIAL_STASH_FIELD] -ns=Diagnostics -description=Create plots for the specified STASH fields. -help=See includes/plot_spatial_stash_field.cylc -type=python_boolean -compulsory=true -sort-key=stash2 - -[template variables=DOMAIN_MEAN_TIME_SERIES_STASH] -ns=Diagnostics -description=Create time series plot of the STASH fields domain mean. -help=See includes/domain_mean_time_series_stash.cylc -type=python_boolean -compulsory=true -sort-key=stash2 - -[template variables=SURFACE_SINGLE_POINT_TIME_SERIES] -ns=Diagnostics -description=Plot a time series at a single specified location in a surface field. -help=Include values of desired longitude and latitude. -type=python_boolean -compulsory=true -trigger=template variables=LATITUDE_POINT: True; - template variables=LONGITUDE_POINT: True; - template variables=SINGLE_POINT_METHOD: True; -sort-key=point1 - -[template variables=LATITUDE_POINT] -ns=Diagnostics -description=Latitude of selected point. Note that this could be rotated or not, depending on the data provided. -help=The latitude must exist within the domain. Value should be a float: for example, -1.5. -type=real -compulsory=true -sort-key=point2 - -[template variables=LONGITUDE_POINT] -ns=Diagnostics -description=Longitude of selected point. Note that this could be rotated or not, depending on the data provided. -help=The longitude must exist within the domain. Value should be a float: for example, 0.8. -type=real -compulsory=true -sort-key=point2 - -[template variables=SINGLE_POINT_METHOD] -ns=Diagnostics -description=Method used to map model data onto selected gridpoints. -help=Method used to map model data onto selected gridpoints. These are regrid methods available in Iris. -values="Nearest", "Linear" -compulsory=true -sort-key=point2 - -[template variables=BASIC_QQ_PLOT] -ns=Diagnostics -description=Create a basic quantile-quantile plot for variables specified collapsing over specified coordinates. - Produces a single Q-Q plot and so requires collapsing across all coordinates in the file. -help=The quantiles defined for each variable are 0, 0.001, 0.01, - 0.1,1,5,10,15,20,25,30,40,50,60,70,75,80,85,90,95,99,99.9,99.99, - 99.999,100. - If more than one variable is supplied e.g. MODEL_FIELDS_A='a','b',...; - MODEL_FIELDS_B='c','d',...; the Q-Q plot will compare 'a' vs. 'c', - 'b' vs. 'd', etc. - It is expected that the length of MODEL_FIELDS_A is the same as - MODEL_FIELDS_B. -trigger=template variables=MODEL_FIELDS_A: True; - template variables=VERTICAL_COORDINATE_A: True; - template variables=LEVELS_A: True; - template variables=MODEL_FIELDS_B: True; - template variables=VERTICAL_COORDINATE_B: True; - template variables=LEVELS_B: True; - template variables=COORDINATE_LIST: True; - template variables=ONE_TO_ONE: True; -type=python_boolean -compulsory=true -sort-key=qq1 - -[template variables=MODEL_FIELDS_A] -ns=Diagnostics -description=List of standard names for model fields to plot on y axis. - Must be the same length as MODEL_FIELDS_B. -help=Include a list of variable names in python list format["var1","var2"]. -type=python_list -compulsory=true -sort-key=qq1a - -[template variables=VERTICAL_COORDINATE_A] -ns=Diagnostics -description=Level coordinate name about which to constrain. The length of this - list should be the same as MODEL_FIELDS A. -help=Include a list of coordinate names in python list format["var1","var2"]. - If single levels are required use any vertical coordinate in the cube and - make sure to put LEVELS_A as []. -type=python_list -compulsory=true -sort-key=qq1b - -[template variables=LEVELS_A] -ns=Diagnostics -description=A list of levels to constrain over. The length of this - list should be the same as MODEL_FIELDS A. -help=Use "*" for all levels, INTEGERS for specific levels, and [] for no levels - (e.g., single level fields). -type=python_list -compulsory=true -sort-key=qq1c - -[template variables=MODEL_FIELDS_B] -ns=Diagnostics -description=List of standard names for model fields to plot on x axis. - Must be the same length as MODEL_FIELDS_A. -help=Include a list of variable names in python list format["var1","var2"]. -type=python_list -compulsory=true -sort-key=qq1d - -[template variables=VERTICAL_COORDINATE_B] -ns=Diagnostics -description=Level coordinate name about which to constrain. The length of this - list should be the same as MODEL_FIELDS B. -help=Include a list of coordinate names in python list format["var1","var2"]. - If single levels are required use any vertical coordinate in the cube and - make sure to put LEVELS_B as []. -type=python_list -compulsory=true -sort-key=qq1e - -[template variables=LEVELS_B] -ns=Diagnostics -description=A list of levels to constrain over. The length of this - list should be the same as MODEL_FIELDS B. -help=Use "*" for all levels, INTEGERS for specific levels, and [] for no levels - (e.g., single level fields). -type=python_list -compulsory=true -sort-key=qq1f - -[template variables=COORDINATE_LIST] -ns=Diagnostics -description=List of coordinates to collapse the percentiles over. The coordinate list - must be of the form [\\"var1\\",\\"var2\\",...].The values should be - all of the coordinates in the cube to ensure the output is 1D with the - percentiles as the coordinate. -help=Include a list of coordinates in python list format[\\"var1\\",\\"var2\\"]. -type=quoted -compulsory=true -sort-key=qq1g - -[template variables=ONE_TO_ONE] -ns=Diagnostics -description=One-to-one line option for plots. -help=Set to True if you want a one-to-one line; false if not. -type=python_boolean -compulsory=true -sort-key=qq1h - -[template variables=SELECT_SUBAREA] -ns=Diagnostics -description=Only analyse a subset of the region defined by the input data. -help=Select this option to run a recipe over a defined latitude-longitude range. -trigger=template variables=SUBAREA_LAT_BOUND_TOP: True; - =template variables=SUBAREA_LAT_BOUND_BOTTOM: True; - =template variables=SUBAREA_LON_BOUND_LEFT: True; - =template variables=SUBAREA_LON_BOUND_RIGHT: True; -type=python_boolean -compulsory=true -sort-key=subsection1 - -[template variables=SUBAREA_LAT_BOUND_TOP] -ns=Diagnostics -description=Top edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid’s native units. -type=real -sort-key=subsection2 -compulsory=true - -[template variables=SUBAREA_LAT_BOUND_BOTTOM] -ns=Diagnostics -description=Bottom edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid’s native units. -type=real -sort-key=subsection2 -compulsory=true - -[template variables=SUBAREA_LON_BOUND_LEFT] -ns=Diagnostics -description=Left edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid’s native units. -type=real -sort-key=subsection2 -compulsory=true - -[template variables=SUBAREA_LON_BOUND_RIGHT] -ns=Diagnostics -description=Right edge coordinate of the sub-area, real. -help=Recommend looking at the input data to get these values. Uses the grid’s native units. -type=real -sort-key=subsection2 -compulsory=true - -[template variables=DETERMINISTIC_PLOT_INFLOW_PROPERTIES] -ns=Diagnostics/Convection -description=Extracts data required for, and calculates the inflow properties diagnostic, plotting on a map. - Required STASH m01s20i119, m01s00i025, m01s00i033. -help=See includes/deterministic_plot_inflow_properties.cylc -type=python_boolean -compulsory=true - -[template variables=DOMAIN_HISTOGRAM_SERIES] -ns=Diagnostics -description=Create a series of histogram plots for selected vertical levels for each cycle time. -help=See includes/deterministic_domain_histogram_series.cylc -trigger=template variables=HISTOGRAM_TYPE: True; -type=python_boolean -compulsory=true -sort-key=hist1 - -[template variables=HISTOGRAM_TYPE] -ns=Diagnostics -description=Type of histogram to plot. "step", "barstacked" or "stepfilled" -help=The type of histogram to plot. Options are "step" for a line histogram or "barstacked", "stepfilled" -values="step","barstacked","stepfilled" -compulsory=true -sort-key=hist3 diff --git a/src/CSET/operators/constraints.py b/src/CSET/operators/constraints.py index da95731a9..5c7cacb3b 100644 --- a/src/CSET/operators/constraints.py +++ b/src/CSET/operators/constraints.py @@ -92,7 +92,7 @@ def generate_level_constraint( ------- constraint: iris.Constraint """ - # If astericks, then return all levels for given coordinate. + # If asterisks, then return all levels for given coordinate. if levels == "*": return iris.Constraint(**{coordinate: lambda cell: True}) else: From d2166353011fa9252357a47436d72d0b3ebba021 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 14:29:19 +0100 Subject: [PATCH 20/90] Convert pressure-level recipes --- ...c_domain_mean_vertical_profile_series.cylc | 20 ++++++++-------- .../plot_spatial_plevel_model_field.cylc | 23 +++++++++--------- ...c_domain_mean_vertical_profile_series.yaml | 24 ++++--------------- .../generic_plevel_spatial_plot_sequence.yaml | 18 +++----------- 4 files changed, 30 insertions(+), 55 deletions(-) diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_profile_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_profile_series.cylc index 2faadc684..2c80bb248 100644 --- a/cset-workflow/includes/deterministic_domain_mean_vertical_profile_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_vertical_profile_series.cylc @@ -1,16 +1,16 @@ {% if DOMAIN_MEAN_VERTICAL_PROFILE_SERIES %} +{% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} [runtime] -{% for model_field in PRESSURE_LEVEL_MODEL_FIELDS %} - [[pre_process_domain_mean_vertical_profile_series_{{model_field}}]] - inherit = PARALLEL + [[generic_domain_mean_vertical_profile_series_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_domain_mean_vertical_profile_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --PLEVEL='{{PRESSURE_LEVELS}}'" - - [[collate_domain_mean_vertical_profile_series_{{model_field}}]] - inherit = COLLATE - [[[environment]]] - CSET_RECIPE_NAME = "generic_domain_mean_vertical_profile_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --PLEVEL='{{PRESSURE_LEVELS}}'" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} {% endfor %} {% endif %} diff --git a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc index 38eec0aed..83a13d391 100644 --- a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc @@ -1,18 +1,19 @@ {% if PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD %} -[runtime] -{% for model_field in PRESSURE_LEVEL_MODEL_FIELDS %} +{% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} {% for plevel in PRESSURE_LEVELS %} - [[process_generic_plevel_spatial_plot_sequence_{{model_field}}_{{plevel}}]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "generic_plevel_spatial_plot_sequence.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}} --PLEVEL={{plevel}}" - - [[collate_generic_plevel_spatial_plot_sequence_{{model_field}}_{{plevel}}]] - inherit = COLLATE +[runtime] + [[generic_plevel_spatial_plot_sequence_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_plevel_spatial_plot_sequence.yaml" - CSET_ADDOPTS = "--VARNAME={{model_field}} --PLEVEL={{plevel}}" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --PLEVEL='{{plevel}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} {% endfor %} {% endfor %} {% endif %} diff --git a/src/CSET/recipes/generic_domain_mean_vertical_profile_series.yaml b/src/CSET/recipes/generic_domain_mean_vertical_profile_series.yaml index 13245cd24..d65198313 100644 --- a/src/CSET/recipes/generic_domain_mean_vertical_profile_series.yaml +++ b/src/CSET/recipes/generic_domain_mean_vertical_profile_series.yaml @@ -1,9 +1,8 @@ category: Profiles -title: Domain horizontal mean $VARNAME vertical profile as series +title: $MODEL_NAME Domain horizontal mean $VARNAME vertical profile as series description: Plots a time series of vertical profiles for the domain mean $VARNAME using a log pressure coordinate. -# Pre-processing steps. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints @@ -12,27 +11,14 @@ parallel: varname: $VARNAME pressure_level_constraint: operator: constraints.generate_level_constraint - coordinate: "pressure" - levels: $PLEVEL - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME + coordinate: pressure + levels: "*" - operator: collapse.collapse coordinate: [grid_latitude, grid_longitude] method: MEAN - # Save domain meaned variable to a file per validity time. - - operator: write.write_cube_to_nc - filename: intermediate/pressure_level_domain_mean - -# Collation steps. -# Reads in intermediate cube and plots it. -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - -# plot the vertical line series + # Plot the vertical line series. - operator: plot.plot_vertical_line_series series_coordinate: pressure sequence_coordinate: time diff --git a/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml b/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml index 28e388077..08a835088 100644 --- a/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml +++ b/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml @@ -1,7 +1,7 @@ category: Pressure Level Spatial -title: $VARNAME $PLEVEL Level Spatial Plot +title: $MODEL_NAME $VARNAME $PLEVEL Level Spatial Plot description: | - Extracts ands plots the $PLEVELNAME from a file at pressure level $PLEVEL. + Extracts ands plots the $PLEVELNAME from $MODEL_NAME at pressure level $PLEVEL. parallel: - operator: read.read_cube @@ -10,25 +10,13 @@ parallel: variable_constraint: operator: constraints.generate_var_constraint varname: $VARNAME - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME pressure_level_constraint: operator: constraints.generate_level_constraint - coordinate: "pressure" + coordinate: pressure levels: $PLEVEL - - operator: write.write_cube_to_nc - - operator: plot.spatial_contour_plot sequence_coordinate: time - - operator: write.write_cube_to_nc - filename: intermediate/pressure_level_field - -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - - operator: write.write_cube_to_nc overwrite: True From 3855fd7e297eda1f518202b06e45db2f3aa64153 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 15:12:08 +0100 Subject: [PATCH 21/90] Squeeze scalar coords, and explain LFRic callbacks --- src/CSET/operators/read.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/CSET/operators/read.py b/src/CSET/operators/read.py index a43fef8d5..41b7949c2 100644 --- a/src/CSET/operators/read.py +++ b/src/CSET/operators/read.py @@ -253,6 +253,7 @@ def _lfric_normalise_callback(cube: iris.cube.Cube, field, filename): # Remove unwanted attributes. cube.attributes.pop("timeStamp", None) cube.attributes.pop("uuid", None) + # There might also be a "name" attribute to ditch, which is the filename. # Sort STASH code list. stash_list = cube.attributes.get("um_stash_source") @@ -268,6 +269,8 @@ def _lfric_time_coord_fix_callback(cube: iris.cube.Cube, field, filename): issue, though the coordinate satisfies all the properties for a DimCoord. Scalar time values are left as AuxCoords. """ + # This issue seems to come from iris's handling of NetCDF files where time + # always ends up as an AuxCoord. if cube.coords("time"): time_coord = cube.coord("time") if not isinstance(time_coord, iris.coords.DimCoord) and cube.coord_dims( @@ -275,6 +278,9 @@ def _lfric_time_coord_fix_callback(cube: iris.cube.Cube, field, filename): ): iris.util.promote_aux_coord_to_dim_coord(cube, time_coord) + # Force single-valued coordinates to be scalar coordinates. + return iris.util.squeeze(cube) + def _check_input_files(input_path: Path | str, filename_pattern: str) -> Iterable[Path]: """Get an iterable of files to load, and check that they all exist. From 9f7ad7600d7c6715b4d17b1fa0418570ebbbea06 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 15:51:49 +0100 Subject: [PATCH 22/90] Convert more recipes and include files --- ...deterministic_domain_histogram_series.cylc | 19 +++++++++++ ...n_vertical_model_level_profile_series.cylc | 16 +++++++++ ...ean_vertical_pressure_profile_series.cylc} | 4 +-- ...an_vertical_profile_modellevel_series.cylc | 16 --------- .../includes/domain_histogram_series.cylc | 18 ---------- .../plot_spatial_mlevel_model_field.cylc | 24 ++++++------- .../plot_spatial_plevel_model_field.cylc | 2 +- cset-workflow/meta/diagnostics/rose-meta.conf | 34 +++++++++---------- ..._domain_mean_vertical_profile_series.yaml} | 24 +++---------- .../generic_mlevel_spatial_plot_sequence.yaml | 14 ++------ ..._domain_mean_vertical_profile_series.yaml} | 0 ...l => generic_plevel_histogram_series.yaml} | 27 +++++---------- 12 files changed, 82 insertions(+), 116 deletions(-) create mode 100644 cset-workflow/includes/deterministic_domain_histogram_series.cylc create mode 100644 cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc rename cset-workflow/includes/{deterministic_domain_mean_vertical_profile_series.cylc => deterministic_domain_mean_vertical_pressure_profile_series.cylc} (70%) delete mode 100644 cset-workflow/includes/deterministic_domain_mean_vertical_profile_modellevel_series.cylc delete mode 100644 cset-workflow/includes/domain_histogram_series.cylc rename src/CSET/recipes/{generic_domain_mean_vertical_profile_modellevel_series.yaml => generic_mlevel_domain_mean_vertical_profile_series.yaml} (55%) rename src/CSET/recipes/{generic_domain_mean_vertical_profile_series.yaml => generic_plevel_domain_mean_vertical_profile_series.yaml} (100%) rename src/CSET/recipes/{generic_histogram_series.yaml => generic_plevel_histogram_series.yaml} (69%) diff --git a/cset-workflow/includes/deterministic_domain_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_histogram_series.cylc new file mode 100644 index 000000000..029159ad0 --- /dev/null +++ b/cset-workflow/includes/deterministic_domain_histogram_series.cylc @@ -0,0 +1,19 @@ +{% if DOMAIN_HISTOGRAM_SERIES %} +{% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} +{% for plevel in PRESSURE_LEVELS %} +[runtime] + [[generic_plevel_histogram_series_m{{model_number}}_{{sanitise_task_name(field)}}_lv{{sanitise_task_name(plevel)}}]] + inherit = PROCESS + [[[environment]]] + CSET_RECIPE_NAME = "generic_plevel_histogram_series.yaml" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --PLEVEL='{{plevel}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} +{% endfor %} +{% endfor %} +{% endif %} diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc new file mode 100644 index 000000000..9489f97c6 --- /dev/null +++ b/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc @@ -0,0 +1,16 @@ +{% if DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES %} +{% for equivalent_field in restructure_field_list(MODEL_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} +[runtime] + [[generic_mlevel_domain_mean_vertical_profile_series_m{{model_number}}_{{sanitise_task_name(field)}}_lv{{sanitise_task_name(mlevel)}}]] + inherit = PROCESS + [[[environment]]] + CSET_RECIPE_NAME = "generic_mlevel_domain_mean_vertical_profile_series.yaml" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} +{% endfor %} +{% endif %} diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_profile_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc similarity index 70% rename from cset-workflow/includes/deterministic_domain_mean_vertical_profile_series.cylc rename to cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc index 2c80bb248..039425364 100644 --- a/cset-workflow/includes/deterministic_domain_mean_vertical_profile_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc @@ -2,10 +2,10 @@ {% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] - [[generic_domain_mean_vertical_profile_series_m{{model_number}}_{{sanitise_task_name(field)}}]] + [[generic_plevel_domain_mean_vertical_profile_series_m{{model_number}}_{{sanitise_task_name(field)}}]] inherit = PROCESS [[[environment]]] - CSET_RECIPE_NAME = "generic_domain_mean_vertical_profile_series.yaml" + CSET_RECIPE_NAME = "generic_plevel_domain_mean_vertical_profile_series.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' --MODEL_NAME='{{models[model_number-1]["name"]}}' diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_profile_modellevel_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_profile_modellevel_series.cylc deleted file mode 100644 index 6ba0078a3..000000000 --- a/cset-workflow/includes/deterministic_domain_mean_vertical_profile_modellevel_series.cylc +++ /dev/null @@ -1,16 +0,0 @@ -{% if DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES %} -[runtime] -{% for model_field in MODEL_LEVEL_MODEL_FIELDS %} - [[pre_process_domain_mean_vertical_profile_series_{{model_field}}]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "generic_domain_mean_vertical_profile_modellevel_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{UM_MODEL_LEVELS}}'" - - [[collate_domain_mean_vertical_profile_series_{{model_field}}]] - inherit = COLLATE - [[[environment]]] - CSET_RECIPE_NAME = "generic_domain_mean_vertical_profile_modellevel_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{UM_MODEL_LEVELS}}'" -{% endfor %} -{% endif %} diff --git a/cset-workflow/includes/domain_histogram_series.cylc b/cset-workflow/includes/domain_histogram_series.cylc deleted file mode 100644 index d562a16cf..000000000 --- a/cset-workflow/includes/domain_histogram_series.cylc +++ /dev/null @@ -1,18 +0,0 @@ -{% if DOMAIN_HISTOGRAM_SERIES %} -[runtime] -{% for model_field in MODEL_LEVEL_MODEL_FIELDS %} -{% for mlevel in UM_MODEL_LEVELS %} - [[pre_process_domain_histogram_series_{{model_field}}_{{mlevel}}]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "generic_histogram_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{mlevel}}' --HISTTYPE='{{HISTOGRAM_TYPE}}'" - - [[collate_domain_histogram_series_{{model_field}}_{{mlevel}}]] - inherit = COLLATE - [[[environment]]] - CSET_RECIPE_NAME = "generic_histogram_series.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{mlevel}}' --HISTTYPE='{{HISTOGRAM_TYPE}}'" -{% endfor %} -{% endfor %} -{% endif %} diff --git a/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc b/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc index af726eb12..a4c5cb2ab 100644 --- a/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc @@ -1,19 +1,19 @@ {% if PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD %} -[runtime] -{% for model_field in MODEL_LEVEL_MODEL_FIELDS %} +{% for equivalent_field in restructure_field_list(MODEL_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} {% for mlevel in UM_MODEL_LEVELS %} - {% set formatted_level = mlevel|string|replace('.', 'p') %} - [[process_generic_mlevel_spatial_plot_sequence_{{model_field}}_{{formatted_level}}]] - inherit = PARALLEL - [[[environment]]] - CSET_RECIPE_NAME = "generic_mlevel_spatial_plot_sequence.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{mlevel}}'" - - [[collate_generic_mlevel_spatial_plot_sequence_{{model_field}}_{{formatted_level}}]] - inherit = COLLATE +[runtime] + [[generic_mlevel_spatial_plot_sequence_m{{model_number}}_{{sanitise_task_name(field)}}_lv{{sanitise_task_name(mlevel)}}]] + inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_mlevel_spatial_plot_sequence.yaml" - CSET_ADDOPTS = "--VARNAME='{{model_field}}' --MLEVEL='{{mlevel}}'" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MLEVEL='{{mlevel}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} {% endfor %} {% endfor %} {% endif %} diff --git a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc index 83a13d391..cc98bf99b 100644 --- a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc @@ -3,7 +3,7 @@ {% for model_number, field in equivalent_field.items() %} {% for plevel in PRESSURE_LEVELS %} [runtime] - [[generic_plevel_spatial_plot_sequence_m{{model_number}}_{{sanitise_task_name(field)}}]] + [[generic_plevel_spatial_plot_sequence_m{{model_number}}_{{sanitise_task_name(field)}}_lv{{sanitise_task_name(plevel)}}]] inherit = PROCESS [[[environment]]] CSET_RECIPE_NAME = "generic_plevel_spatial_plot_sequence.yaml" diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index 8b3d47adb..d81f7e3e0 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -86,6 +86,23 @@ type=python_boolean compulsory=true sort-key=pressure3 +[template variables=DOMAIN_HISTOGRAM_SERIES] +ns=Diagnostics +description=Create a series of histogram plots for selected pressure levels for each cycle time. +help=See includes/deterministic_domain_histogram_series.cylc +trigger=template variables=HISTOGRAM_TYPE: True; +type=python_boolean +compulsory=true +sort-key=pressure4a + +[template variables=HISTOGRAM_TYPE] +ns=Diagnostics +description=Type of histogram to plot. "step", "barstacked" or "stepfilled" +help=The type of histogram to plot. Options are "step" for a line histogram or "barstacked", "stepfilled" +values="step","barstacked","stepfilled" +compulsory=true +sort-key=pressure4b + # Model-level fields. # Temporerorly disabled while we figure out LFRic equivalent. @@ -432,20 +449,3 @@ description=Extracts data required for, and calculates the inflow properties dia help=See includes/deterministic_plot_inflow_properties.cylc type=python_boolean compulsory=true - -[template variables=DOMAIN_HISTOGRAM_SERIES] -ns=Diagnostics -description=Create a series of histogram plots for selected vertical levels for each cycle time. -help=See includes/deterministic_domain_histogram_series.cylc -trigger=template variables=HISTOGRAM_TYPE: True; -type=python_boolean -compulsory=true -sort-key=hist1 - -[template variables=HISTOGRAM_TYPE] -ns=Diagnostics -description=Type of histogram to plot. "step", "barstacked" or "stepfilled" -help=The type of histogram to plot. Options are "step" for a line histogram or "barstacked", "stepfilled" -values="step","barstacked","stepfilled" -compulsory=true -sort-key=hist3 diff --git a/src/CSET/recipes/generic_domain_mean_vertical_profile_modellevel_series.yaml b/src/CSET/recipes/generic_mlevel_domain_mean_vertical_profile_series.yaml similarity index 55% rename from src/CSET/recipes/generic_domain_mean_vertical_profile_modellevel_series.yaml rename to src/CSET/recipes/generic_mlevel_domain_mean_vertical_profile_series.yaml index abd35ec4b..6beb6c891 100644 --- a/src/CSET/recipes/generic_domain_mean_vertical_profile_modellevel_series.yaml +++ b/src/CSET/recipes/generic_mlevel_domain_mean_vertical_profile_series.yaml @@ -1,9 +1,8 @@ category: Profiles -title: Domain mean $VARNAME vertical profile as series +title: $MODEL_NAME Domain horizontal mean $VARNAME vertical profile as series description: Plots a time series of vertical profiles for the domain mean $VARNAME using a `model_level_number` coordinate. -# Pre-processing steps. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints @@ -12,27 +11,14 @@ parallel: varname: $VARNAME pressure_level_constraint: operator: constraints.generate_level_constraint - coordinate: "model_level_number" - levels: $MLEVEL - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME + coordinate: model_level_number + levels: "*" - operator: collapse.collapse coordinate: [grid_latitude, grid_longitude] method: MEAN - # Save domain meaned variable to a file per validity time. - - operator: write.write_cube_to_nc - filename: intermediate/model_level_level_domain_mean - -# Collation steps. -# Reads in intermediate cube and plots it. -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - -# plot the vertical line series + # Plot the vertical line series. - operator: plot.plot_vertical_line_series series_coordinate: model_level_number sequence_coordinate: time diff --git a/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml b/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml index f6d09b631..78af26ebe 100644 --- a/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml +++ b/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml @@ -1,5 +1,5 @@ category: Model Level Spatial -title: $VARNAME $MLEVEL Level Spatial Plot +title: $MODEL_NAME $VARNAME $MLEVEL Level Spatial Plot description: | Extracts ands plots the $VARNAME from a file at model level $MLEVEL. @@ -10,24 +10,14 @@ parallel: variable_constraint: operator: constraints.generate_var_constraint varname: $VARNAME - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME model_level_constraint: operator: constraints.generate_level_constraint coordinate: "model_level_number" levels: $MLEVEL - - operator: write.write_cube_to_nc - filename: intermediate/model_level_field - -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - - operator: plot.spatial_contour_plot sequence_coordinate: time - stamp_coordinate: "realization" + stamp_coordinate: realization - operator: write.write_cube_to_nc overwrite: True diff --git a/src/CSET/recipes/generic_domain_mean_vertical_profile_series.yaml b/src/CSET/recipes/generic_plevel_domain_mean_vertical_profile_series.yaml similarity index 100% rename from src/CSET/recipes/generic_domain_mean_vertical_profile_series.yaml rename to src/CSET/recipes/generic_plevel_domain_mean_vertical_profile_series.yaml diff --git a/src/CSET/recipes/generic_histogram_series.yaml b/src/CSET/recipes/generic_plevel_histogram_series.yaml similarity index 69% rename from src/CSET/recipes/generic_histogram_series.yaml rename to src/CSET/recipes/generic_plevel_histogram_series.yaml index c30d13257..bca484607 100644 --- a/src/CSET/recipes/generic_histogram_series.yaml +++ b/src/CSET/recipes/generic_plevel_histogram_series.yaml @@ -1,42 +1,31 @@ category: Histogram -title: $VARNAME $MLEVEL Level Histogram Plot +title: $MODEL_NAME $VARNAME $PLEVEL Level Histogram Plot description: | - Extracts and plots the probability density of $MLEVEL level $VARNAME from a - file at model level $MLEVEL. It uses + Extracts and plots the probability density of $PLEVEL level $VARNAME from a + file at model level $PLEVEL of $MODEL_NAME. It uses [`plt.hist`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.hist.html) to plot the probability density so that the area under the histogram integrates to 1. `stacked` is set to True so the sum of the histograms is normalized to 1. In case of ensemble data choose from postage stamp plot or single plot via the single_plot option in the recipe directly. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints variable_constraint: operator: constraints.generate_var_constraint varname: $VARNAME - validity_time_constraint: - operator: constraints.generate_time_constraint - time_start: $VALIDITY_TIME model_level_constraint: operator: constraints.generate_level_constraint coordinate: "model_level_number" - levels: $MLEVEL - - - operator: write.write_cube_to_nc - filename: intermediate/histogram - - -collate: - - operator: read.read_cube - filename_pattern: intermediate/*.nc - - - operator: write.write_cube_to_nc - overwrite: True + levels: $PLEVEL - operator: plot.plot_histogram_series sequence_coordinate: time # stamp_coordinate and single_plot optional and only required for ensemble data stamp_coordinate: "realization" single_plot: False + + - operator: write.write_cube_to_nc + overwrite: True From 928390c45aa20ab311c8981afe322effa3b8309e Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 16:04:17 +0100 Subject: [PATCH 23/90] Ajust metadata sort and remove unhelpful help --- cset-workflow/meta/diagnostics/rose-meta.conf | 104 ++++++++---------- 1 file changed, 47 insertions(+), 57 deletions(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index d81f7e3e0..959bf0ffa 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -19,31 +19,28 @@ compulsory=true element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted length=: -sort-key=surface1 +sort-key=0surface1 [template variables=PLOT_SPATIAL_SURFACE_MODEL_FIELD] ns=Diagnostics/Quicklook description=Create plots for the specified surface fields. -help=See includes/plot_spatial_surface_model_field.cylc type=python_boolean compulsory=true -sort-key=surface2 +sort-key=0surface2 [template variables=DOMAIN_MEAN_SURFACE_TIME_SERIES] ns=Diagnostics/Quicklook description=Create time series plot of surface field domain mean. -help=See includes/deterministic_domain_mean_surface_time_series.cylc type=python_boolean compulsory=true -sort-key=surface3 +sort-key=0surface3 [template variables=DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD] ns=Diagnostics/Quicklook description=Create a series of histogram plots for selected surface fields for each cycle time. -help=See includes/deterministic_domain_surface_histogram_series.cylc. This diagnostic requires the user to also enable and enter the SURFACE_MODEL_FIELDS. The series_coordinate in the recipe is set to "time", but can be switched to any other coordinate. type=python_boolean compulsory=true -sort-key=surface4 +sort-key=0surface4 # Pressure level fields. [template variables=PRESSURE_LEVEL_MODEL_FIELDS] @@ -60,7 +57,7 @@ compulsory=true element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted length=: -sort-key=pressure1 +sort-key=1pressure1 [template variables=PRESSURE_LEVELS] ns=Diagnostics/Quicklook @@ -68,32 +65,29 @@ description=List of pressure levels to generate plots for. help=A list of integer pressure levels in hPa in python list format. E.g: [850, 1000] type=python_list compulsory=true -sort-key=pressure2 +sort-key=1pressure2 [template variables=PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD] ns=Diagnostics/Quicklook description=Create plots for the specified pressure level fields. -help=See includes/generic_spatial_plevel_model_field.cylc type=python_boolean compulsory=true -sort-key=pressure3 +sort-key=1pressure3 [template variables=DOMAIN_MEAN_VERTICAL_PROFILE_SERIES] ns=Diagnostics/Quicklook description=Domain averaged vertical profile of pressure levels for each time. -help=See includes/deterministic_domain_mean_vertical_profile_series.cylc type=python_boolean compulsory=true -sort-key=pressure3 +sort-key=1pressure3 [template variables=DOMAIN_HISTOGRAM_SERIES] ns=Diagnostics description=Create a series of histogram plots for selected pressure levels for each cycle time. -help=See includes/deterministic_domain_histogram_series.cylc trigger=template variables=HISTOGRAM_TYPE: True; type=python_boolean compulsory=true -sort-key=pressure4a +sort-key=1pressure4a [template variables=HISTOGRAM_TYPE] ns=Diagnostics @@ -101,50 +95,46 @@ description=Type of histogram to plot. "step", "barstacked" or "stepfilled" help=The type of histogram to plot. Options are "step" for a line histogram or "barstacked", "stepfilled" values="step","barstacked","stepfilled" compulsory=true -sort-key=pressure4b +sort-key=1pressure4b # Model-level fields. -# Temporerorly disabled while we figure out LFRic equivalent. - -# [template variables=MODEL_LEVEL_MODEL_FIELDS] -# ns=Diagnostics/Quicklook -# title=Model level model fields -# description=Per model field names. -# help=Variable names for model level variables. The names across a row should -# match the same physical phenomenon, and use the appropriate standard, long, -# or field name, or the STASH code for each model. Blank entries indicate that -# a model does not have that phenomenon, so it will be skipped. - -# Ignore the boxes for models that are not enabled. -# compulsory=true -# element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 -# type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted -# length=: -# sort-key=modellevel1 - -# [template variables=UM_MODEL_LEVELS] -# ns=Diagnostics -# description=List of UM model levels to generate plots for. -# help=Include an integer list of model levels in python list format, e.g: [1,2,3,4] -# type=python_list -# compulsory=true -# sort-key=modellevel2 - -# [template variables=PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD] -# ns=Diagnostics -# description=Create plots for the specified model level fields. -# help=See includes/generic_spatial_mlevel_model_field.cylc -# type=python_boolean -# compulsory=true -# sort-key=modellevel3 - -# [template variables=DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES] -# ns=Diagnostics -# description=Domain averaged vertical profile for each validity time. Requires fields UM_MODEL_LEVELS and MODEL_LEVEL_MODEL_FIELDS as input. -# help=See includes/deterministic_domain_mean_vertical_profile_modellevel_series.cylc -# type=python_boolean -# compulsory=true -# sort-key=modellevel4 +[template variables=MODEL_LEVEL_MODEL_FIELDS] +ns=Diagnostics/Quicklook +title=Model level model fields +description=Per model field names. +help=Variable names for model level variables. The names across a row should + match the same physical phenomenon, and use the appropriate standard, long, + or field name, or the STASH code for each model. Blank entries indicate that + a model does not have that phenomenon, so it will be skipped. + + Ignore the boxes for models that are not enabled. +compulsory=true +element-titles=Model 01,Model 02,Model 03,Model 04,Model 05,Model 06,Model 07,Model 08,Model 09,Model 10 +type=quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted,quoted +length=: +sort-key=2modellevel1 + +[template variables=UM_MODEL_LEVELS] +ns=Diagnostics +description=List of UM model levels to generate plots for. +help=Include an integer list of model levels in python list format, e.g: [1, 2, 3, 4] +type=python_list +compulsory=true +sort-key=2modellevel2 + +[template variables=PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD] +ns=Diagnostics +description=Create plots for the specified model level fields. +type=python_boolean +compulsory=true +sort-key=2modellevel3 + +[template variables=DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES] +ns=Diagnostics +description=Domain averaged vertical profile for each validity time. Requires fields UM_MODEL_LEVELS and MODEL_LEVEL_MODEL_FIELDS as input. +type=python_boolean +compulsory=true +sort-key=2modellevel4 # METplus. From adea843a1af8917f01e77c3d9dd508d07f4bd300 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 16:08:35 +0100 Subject: [PATCH 24/90] Move model level plots under quicklook --- cset-workflow/meta/diagnostics/rose-meta.conf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index 959bf0ffa..cb2f69fa5 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -82,7 +82,7 @@ compulsory=true sort-key=1pressure3 [template variables=DOMAIN_HISTOGRAM_SERIES] -ns=Diagnostics +ns=Diagnostics/Quicklook description=Create a series of histogram plots for selected pressure levels for each cycle time. trigger=template variables=HISTOGRAM_TYPE: True; type=python_boolean @@ -90,7 +90,7 @@ compulsory=true sort-key=1pressure4a [template variables=HISTOGRAM_TYPE] -ns=Diagnostics +ns=Diagnostics/Quicklook description=Type of histogram to plot. "step", "barstacked" or "stepfilled" help=The type of histogram to plot. Options are "step" for a line histogram or "barstacked", "stepfilled" values="step","barstacked","stepfilled" @@ -115,7 +115,7 @@ length=: sort-key=2modellevel1 [template variables=UM_MODEL_LEVELS] -ns=Diagnostics +ns=Diagnostics/Quicklook description=List of UM model levels to generate plots for. help=Include an integer list of model levels in python list format, e.g: [1, 2, 3, 4] type=python_list @@ -123,15 +123,15 @@ compulsory=true sort-key=2modellevel2 [template variables=PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD] -ns=Diagnostics +ns=Diagnostics/Quicklook description=Create plots for the specified model level fields. type=python_boolean compulsory=true sort-key=2modellevel3 [template variables=DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES] -ns=Diagnostics -description=Domain averaged vertical profile for each validity time. Requires fields UM_MODEL_LEVELS and MODEL_LEVEL_MODEL_FIELDS as input. +ns=Diagnostics/Quicklook +description=Domain averaged vertical profile for each validity time. type=python_boolean compulsory=true sort-key=2modellevel4 From 6f440c33359ba7e5f37654554414092cc579c47a Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 16:32:20 +0100 Subject: [PATCH 25/90] Cast to string before sanitising cylc task names --- cset-workflow/lib/python/jinja_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cset-workflow/lib/python/jinja_utils.py b/cset-workflow/lib/python/jinja_utils.py index b00caa200..5c840a732 100644 --- a/cset-workflow/lib/python/jinja_utils.py +++ b/cset-workflow/lib/python/jinja_utils.py @@ -100,6 +100,9 @@ def sanitise_task_name(s: str): Note that actually there are a few more characters supported, see: https://github.com/cylc/cylc-flow/issues/6288 """ + # Ensure we have a string. + if not isinstance(s, str): + s = str(s) # Ensure the first character is alphanumeric. if not s[0].isalnum(): s = f"sanitised_{s}" From 1e4c2354268e00f7220c6206ed720f3919cb5a11 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 16:53:16 +0100 Subject: [PATCH 26/90] Rename parallel to steps in recipes --- src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml | 2 +- src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml | 2 +- src/CSET/recipes/generic_surface_histogram_series.yaml | 2 +- src/CSET/recipes/generic_surface_single_point_time_series.yaml | 3 +-- src/CSET/recipes/inflow_layer_properties_plot.yaml | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml b/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml index 78af26ebe..cbb15dadd 100644 --- a/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml +++ b/src/CSET/recipes/generic_mlevel_spatial_plot_sequence.yaml @@ -3,7 +3,7 @@ title: $MODEL_NAME $VARNAME $MLEVEL Level Spatial Plot description: | Extracts ands plots the $VARNAME from a file at model level $MLEVEL. -parallel: +steps: - operator: read.read_cubes constraint: operator: constraints.combine_constraints diff --git a/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml b/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml index 08a835088..86a2da54f 100644 --- a/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml +++ b/src/CSET/recipes/generic_plevel_spatial_plot_sequence.yaml @@ -3,7 +3,7 @@ title: $MODEL_NAME $VARNAME $PLEVEL Level Spatial Plot description: | Extracts ands plots the $PLEVELNAME from $MODEL_NAME at pressure level $PLEVEL. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints diff --git a/src/CSET/recipes/generic_surface_histogram_series.yaml b/src/CSET/recipes/generic_surface_histogram_series.yaml index 2425c2e34..ed34358a0 100644 --- a/src/CSET/recipes/generic_surface_histogram_series.yaml +++ b/src/CSET/recipes/generic_surface_histogram_series.yaml @@ -7,7 +7,7 @@ description: | integrates to 1. stacked is set to True so the sum of the histograms is normalized to 1. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints diff --git a/src/CSET/recipes/generic_surface_single_point_time_series.yaml b/src/CSET/recipes/generic_surface_single_point_time_series.yaml index a7ac998f9..bb9300fc4 100644 --- a/src/CSET/recipes/generic_surface_single_point_time_series.yaml +++ b/src/CSET/recipes/generic_surface_single_point_time_series.yaml @@ -2,8 +2,7 @@ category: Time Series title: $MODEL_NAME Time series of $VARNAME at $LATITUDE_POINT N, $LONGITUDE_POINT E description: Plots a time series of the surface $VARNAME at a selected gridpoint. -# Parallel steps. -parallel: +steps: - operator: read.read_cube constraint: operator: constraints.combine_constraints diff --git a/src/CSET/recipes/inflow_layer_properties_plot.yaml b/src/CSET/recipes/inflow_layer_properties_plot.yaml index a58e03818..ef08ded5c 100644 --- a/src/CSET/recipes/inflow_layer_properties_plot.yaml +++ b/src/CSET/recipes/inflow_layer_properties_plot.yaml @@ -3,7 +3,7 @@ title: $MODEL_NAME Inflow layer properties plot description: | Extracts data required for, and calculates the Inflow properties diagnostic, plotting on a spatial map. -parallel: +steps: - operator: read.read_cubes - operator: convection.inflow_layer_properties From 1cc58664af9b92ed9d1f918a6d3a4677bcc21003 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 22 Aug 2024 17:08:21 +0100 Subject: [PATCH 27/90] Correct level coordinate read in recipe --- src/CSET/operators/read.py | 1 + src/CSET/recipes/generic_plevel_histogram_series.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CSET/operators/read.py b/src/CSET/operators/read.py index 41b7949c2..c75338037 100644 --- a/src/CSET/operators/read.py +++ b/src/CSET/operators/read.py @@ -304,6 +304,7 @@ def _check_input_files(input_path: Path | str, filename_pattern: str) -> Iterabl FileNotFoundError: If the provided arguments don't resolve to at least one existing file. """ + logging.debug("Checking '%s' for pattern '%s'", input_path, filename_pattern) # Convert string paths into Path objects. if isinstance(input_path, str): input_path = Path(input_path) diff --git a/src/CSET/recipes/generic_plevel_histogram_series.yaml b/src/CSET/recipes/generic_plevel_histogram_series.yaml index bca484607..13133953a 100644 --- a/src/CSET/recipes/generic_plevel_histogram_series.yaml +++ b/src/CSET/recipes/generic_plevel_histogram_series.yaml @@ -18,7 +18,7 @@ steps: varname: $VARNAME model_level_constraint: operator: constraints.generate_level_constraint - coordinate: "model_level_number" + coordinate: pressure levels: $PLEVEL - operator: plot.plot_histogram_series From cc7d3938de567a8c95ab559fb4707be30187ddcd Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 09:46:31 +0100 Subject: [PATCH 28/90] Return CubeList from scatter plot operator --- src/CSET/operators/plot.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/CSET/operators/plot.py b/src/CSET/operators/plot.py index 74baac78a..64ae0eca7 100644 --- a/src/CSET/operators/plot.py +++ b/src/CSET/operators/plot.py @@ -1071,7 +1071,7 @@ def scatter_plot( filename: str = None, one_to_one: bool = True, **kwargs, -) -> tuple[iris.cube.Cube, iris.cube.Cube]: +) -> iris.cube.CubeList[iris.cube.Cube, iris.cube.Cube]: """Plot a scatter plot between two variables. Both cubes must be 1D. @@ -1085,15 +1085,12 @@ def scatter_plot( filename: str, optional Filename of the plot to write. one_to_one: bool, optional - If True a 1:1 line is plotted; if False it is not. - Default is True. + If True a 1:1 line is plotted; if False it is not. Default is True. Returns ------- - cube_x - The original x cube (so further operations can be applied). - cube_y - The original y cube (so further operations can be applied). + cubes: CubeList + CubeList of the original x and y cubes for further processing. Raises ------ @@ -1106,18 +1103,17 @@ def scatter_plot( Notes ----- Scatter plots are used for determining if there is a relationship between - two variables. Positive relations have a slope going from bottom left to - top right; Negative relations have a slope going from top left to bottom - right. + two variables. Positive relations have a slope going from bottom left to top + right; Negative relations have a slope going from top left to bottom right. A variant of the scatter plot is the quantile-quantile plot. This plot does not use all data points, but the selected quantiles of each variable instead. Quantile-quantile plots are valuable for comparing against observations and other models. Identical percentiles between the variables will lie on the one-to-one line implying the values correspond well to each - other. Where there is a deviation from the one-to-one line a range - of possibilities exist depending on how and where the data is shifted - (e.g., Wilks 2011 [Wilks2011]_). + other. Where there is a deviation from the one-to-one line a range of + possibilities exist depending on how and where the data is shifted (e.g., + Wilks 2011 [Wilks2011]_). For distributions above the one-to-one line the distribution is left-skewed; below is right-skewed. A distinct break implies a bimodal distribution, and @@ -1155,7 +1151,7 @@ def scatter_plot( # Make a page to display the plots. _make_plot_html_page(plot_index) - return cube_x, cube_y + return iris.cube.CubeList([cube_x, cube_y]) def plot_histogram_series( From 4d46c7d6129c62d1dea61f1ff7ad0cf102f5007f Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 09:47:27 +0100 Subject: [PATCH 29/90] Convert transect recipe Q-Q plot recipe is partially converted, pending Issue #808. --- cset-workflow/includes/basic_qq_plot.cylc | 17 + cset-workflow/includes/mlevel_transect.cylc | 21 ++ cset-workflow/includes/plevel_transect.cylc | 21 ++ cset-workflow/includes/transect.cylc | 18 - cset-workflow/meta/diagnostics/rose-meta.conf | 324 ++++++++++-------- src/CSET/recipes/generic_basic_qq_plot.yaml | 40 +-- src/CSET/recipes/transect.yaml | 37 +- 7 files changed, 249 insertions(+), 229 deletions(-) create mode 100644 cset-workflow/includes/mlevel_transect.cylc create mode 100644 cset-workflow/includes/plevel_transect.cylc delete mode 100644 cset-workflow/includes/transect.cylc diff --git a/cset-workflow/includes/basic_qq_plot.cylc b/cset-workflow/includes/basic_qq_plot.cylc index ae792ff8e..15d9d4f21 100644 --- a/cset-workflow/includes/basic_qq_plot.cylc +++ b/cset-workflow/includes/basic_qq_plot.cylc @@ -31,3 +31,20 @@ """ {% endfor %} {% endif %} + +{# Example code to use as basis for multi-model cycling: +{% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} +[runtime] + [[generic_basic_qq_plot_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS + [[[environment]]] + CSET_RECIPE_NAME = "generic_basic_qq_plot.yaml" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} +{% endfor %} +#} diff --git a/cset-workflow/includes/mlevel_transect.cylc b/cset-workflow/includes/mlevel_transect.cylc new file mode 100644 index 000000000..710074830 --- /dev/null +++ b/cset-workflow/includes/mlevel_transect.cylc @@ -0,0 +1,21 @@ +{% if EXTRACT_MLEVEL_TRANSECT %} +{% for equivalent_field in restructure_field_list(MODEL_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} +[runtime] + [[mlevel_transect_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS + # Transect calculation can take a while, so increase the time limit. + execution time limit = PT60M + [[[environment]]] + CSET_RECIPE_NAME = "transect.yaml" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --VERTICAL_COORDINATE='model_level_number' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + --START_COORDS='{{MLEVEL_TRANSECT_STARTCOORDS}}' + --FINISH_COORDS='{{MLEVEL_TRANSECT_FINISHCOORDS}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} +{% endfor %} +{% endif %} diff --git a/cset-workflow/includes/plevel_transect.cylc b/cset-workflow/includes/plevel_transect.cylc new file mode 100644 index 000000000..18b99f71f --- /dev/null +++ b/cset-workflow/includes/plevel_transect.cylc @@ -0,0 +1,21 @@ +{% if EXTRACT_PLEVEL_TRANSECT %} +{% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} +{% for model_number, field in equivalent_field.items() %} +[runtime] + [[plevel_transect_m{{model_number}}_{{sanitise_task_name(field)}}]] + inherit = PROCESS + # Transect calculation can take a while, so increase the time limit. + execution time limit = PT60M + [[[environment]]] + CSET_RECIPE_NAME = "transect.yaml" + CSET_ADDOPTS = """ + --VARNAME='{{field}}' + --VERTICAL_COORDINATE='pressure' + --MODEL_NAME='{{models[model_number-1]["name"]}}' + --START_COORDS='{{PLEVEL_TRANSECT_STARTCOORDS}}' + --FINISH_COORDS='{{PLEVEL_TRANSECT_FINISHCOORDS}}' + """ + MODEL_NUMBER = {{model_number}} +{% endfor %} +{% endfor %} +{% endif %} diff --git a/cset-workflow/includes/transect.cylc b/cset-workflow/includes/transect.cylc deleted file mode 100644 index 69ab854fa..000000000 --- a/cset-workflow/includes/transect.cylc +++ /dev/null @@ -1,18 +0,0 @@ -{% if EXTRACT_TRANSECT %} -{% for var in CS_VARS %} -[runtime] - [[parallel_transect_{{var}}]] - inherit = PARALLEL - execution time limit = PT60M - [[[environment]]] - CSET_RECIPE_NAME = "transect.yaml" - CSET_ADDOPTS = "--CS_STARTCOORDS='{{CS_STARTCOORDS}}' --CS_FINISHCOORDS='{{CS_FINISHCOORDS}}' --CS_VAR='{{var}}' --CS_VERTLEV='{{CS_VERTLEV}}'" - - [[collate_transect_{{var}}]] - inherit = COLLATE - execution time limit = PT60M - [[[environment]]] - CSET_RECIPE_NAME = "transect.yaml" - CSET_ADDOPTS = "--CS_STARTCOORDS='{{CS_STARTCOORDS}}' --CS_FINISHCOORDS='{{CS_FINISHCOORDS}}' --CS_VAR='{{var}}' --CS_VERTLEV='{{CS_VERTLEV}}'" -{% endfor %} -{% endif %} diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index cb2f69fa5..e3d672d1a 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -42,6 +42,42 @@ type=python_boolean compulsory=true sort-key=0surface4 +[template variables=SURFACE_SINGLE_POINT_TIME_SERIES] +ns=Diagnostics/Quicklook +description=Plot a time series at a single specified location in a surface field. +help=Include values of desired longitude and latitude. +type=python_boolean +compulsory=true +trigger=template variables=LATITUDE_POINT: True; + template variables=LONGITUDE_POINT: True; + template variables=SINGLE_POINT_METHOD: True; +sort-key=0surface5 + +[template variables=LATITUDE_POINT] +ns=Diagnostics/Quicklook +description=Latitude of selected point. Note that this could be rotated or not, depending on the data provided. +help=The latitude must exist within the domain. Value should be a float: for example, -1.5. +type=real +compulsory=true +sort-key=0surface6 + +[template variables=LONGITUDE_POINT] +ns=Diagnostics/Quicklook +description=Longitude of selected point. Note that this could be rotated or not, depending on the data provided. +help=The longitude must exist within the domain. Value should be a float: for example, 0.8. +type=real +compulsory=true +sort-key=0surface6 + +[template variables=SINGLE_POINT_METHOD] +ns=Diagnostics/Quicklook +description=Method used to map model data onto selected gridpoints. +help=Method used to map model data onto selected gridpoints. These are regrid methods available in Iris. +values="Nearest", "Linear" +compulsory=true +sort-key=0surface6 + + # Pressure level fields. [template variables=PRESSURE_LEVEL_MODEL_FIELDS] ns=Diagnostics/Quicklook @@ -97,6 +133,44 @@ values="step","barstacked","stepfilled" compulsory=true sort-key=1pressure4b +[template variables=EXTRACT_PLEVEL_TRANSECT] +ns=Diagnostics/Quicklook +description=Plot a cross section of pressure for variables. +help=This functionality extracts data for each variable and extracts a column + along each point of the transect. The number of points along the transect + is determined by calculating the length of the transect in degrees, and + dividing this by the minimum grid spacing within the domain. Additional + dimensions (time, ensemble) are accepted and returned as a cross section. + This allows the user to compute slices through features of meteorological + interest, such as transects through valleys, or through a front as it + passes through. +type=python_boolean +trigger=template variables=PLEVEL_TRANSECT_STARTCOORDS: True; + template variables=PLEVEL_TRANSECT_FINISHCOORDS: True; +compulsory=true +sort-key=1pressure5 + +[template variables=PLEVEL_TRANSECT_STARTCOORDS] +ns=Diagnostics/Quicklook +description=Start latitude, longitude of the cross section. +help=The latitude, longitude coordinate with respect to the model grid where the + cross section will start i.e. the furthest left hand point of the plot, + where the x axis is distance along transect, and y axis is pressure level). +type=real,real +compulsory=true +sort-key=1pressure6 + +[template variables=PLEVEL_TRANSECT_FINISHCOORDS] +ns=Diagnostics/Quicklook +description=Finish latitude, longitude of the cross section. +help=The latitude, longitude coordinate with respect to the model grid where the + cross section will finish i.e. the furthest right hand point of the plot, + where the x axis is distance along transect, and y axis is pressure level). +type=real,real +compulsory=true +sort-key=1pressure6 + + # Model-level fields. [template variables=MODEL_LEVEL_MODEL_FIELDS] ns=Diagnostics/Quicklook @@ -136,6 +210,43 @@ type=python_boolean compulsory=true sort-key=2modellevel4 +[template variables=EXTRACT_ Date: Fri, 23 Aug 2024 10:32:38 +0100 Subject: [PATCH 30/90] Fix CLI tests --- src/CSET/graph.py | 3 +- tests/test_cli.py | 29 ------------------- tests/test_data/addopts_test_recipe.yaml | 5 +--- tests/test_data/ensemble_air_temp.yaml | 2 +- tests/test_data/noop_recipe.yaml | 5 +--- tests/test_data/plot_instant_air_temp.yaml | 2 +- .../plot_instant_air_temp_collapse.yaml | 2 +- 7 files changed, 6 insertions(+), 42 deletions(-) diff --git a/src/CSET/graph.py b/src/CSET/graph.py index cb6890317..7404d3ae6 100644 --- a/src/CSET/graph.py +++ b/src/CSET/graph.py @@ -85,8 +85,7 @@ def step_parser(step: dict, prev_node: str) -> str: prev_node = "START" graph.add_node(prev_node) try: - # TODO: Expand to cover collate too. - for step in recipe["parallel"]: + for step in recipe["steps"]: prev_node = step_parser(step, prev_node) except KeyError as err: raise ValueError("Invalid recipe") from err diff --git a/tests/test_cli.py b/tests/test_cli.py index 82bad681f..2834fb4c0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -52,35 +52,6 @@ def test_bake_recipe_execution(tmp_path): ) -def test_bake_parallel_only(tmp_path): - """Run recipe parallel steps from the command line.""" - subprocess.run( - [ - "cset", - "bake", - f"--input-dir={os.devnull}", - f"--output-dir={tmp_path}", - "--recipe=tests/test_data/noop_recipe.yaml", - "--parallel-only", - ], - check=True, - ) - - -def test_bake_post_only(tmp_path): - """Run recipe collate steps from the command line.""" - subprocess.run( - [ - "cset", - "bake", - f"--output-dir={tmp_path}", - "--recipe=tests/test_data/noop_recipe.yaml", - "--collate-only", - ], - check=True, - ) - - def test_bake_invalid_args(): """Invalid arguments give non-zero exit code.""" with pytest.raises(subprocess.CalledProcessError): diff --git a/tests/test_data/addopts_test_recipe.yaml b/tests/test_data/addopts_test_recipe.yaml index bfd681b23..227f819b0 100644 --- a/tests/test_data/addopts_test_recipe.yaml +++ b/tests/test_data/addopts_test_recipe.yaml @@ -1,9 +1,6 @@ title: "List: $LIST" description: A recipe that has a list templated. Only used for testing. -parallel: +steps: - operator: misc.noop argument: $LIST - -collate: - - operator: misc.noop diff --git a/tests/test_data/ensemble_air_temp.yaml b/tests/test_data/ensemble_air_temp.yaml index b7fb90dd9..aa3e62e80 100644 --- a/tests/test_data/ensemble_air_temp.yaml +++ b/tests/test_data/ensemble_air_temp.yaml @@ -1,4 +1,4 @@ -parallel: +steps: - operator: read.read_cubes filename_pattern: "exeter_em*.nc" constraint: diff --git a/tests/test_data/noop_recipe.yaml b/tests/test_data/noop_recipe.yaml index 4d359534d..9cf2ad4af 100644 --- a/tests/test_data/noop_recipe.yaml +++ b/tests/test_data/noop_recipe.yaml @@ -1,12 +1,9 @@ title: Noop description: A recipe that does nothing. Only used for testing. -parallel: +steps: - operator: misc.noop test_argument: Banana dict_argument: {"key": "value"} substep: operator: constraints.combine_constraints - -collate: - - operator: misc.noop diff --git a/tests/test_data/plot_instant_air_temp.yaml b/tests/test_data/plot_instant_air_temp.yaml index 6f2db6383..0328510d0 100644 --- a/tests/test_data/plot_instant_air_temp.yaml +++ b/tests/test_data/plot_instant_air_temp.yaml @@ -3,7 +3,7 @@ description: | Extracts out the instantaneous 1.5m air temperature from a file and writes it to a new one. -parallel: +steps: - operator: read.read_cubes constraint: operator: constraints.generate_stash_constraint diff --git a/tests/test_data/plot_instant_air_temp_collapse.yaml b/tests/test_data/plot_instant_air_temp_collapse.yaml index ffc18dc1c..3f970db22 100644 --- a/tests/test_data/plot_instant_air_temp_collapse.yaml +++ b/tests/test_data/plot_instant_air_temp_collapse.yaml @@ -2,7 +2,7 @@ title: Plot average air temperature description: | Plots the mean 1.5m air temperature over an area. -parallel: +steps: - operator: read.read_cubes constraint: operator: constraints.generate_stash_constraint From 41443f0e9bda0a0cc33e40c1db3da388676b8153 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 10:54:37 +0100 Subject: [PATCH 31/90] Fix common and operator runner tests --- src/CSET/operators/__init__.py | 3 +-- tests/test_common.py | 7 +++---- tests/test_run_recipes.py | 33 ++++++++------------------------- 3 files changed, 12 insertions(+), 31 deletions(-) diff --git a/src/CSET/operators/__init__.py b/src/CSET/operators/__init__.py index a1c9cc292..8aff3e716 100644 --- a/src/CSET/operators/__init__.py +++ b/src/CSET/operators/__init__.py @@ -105,8 +105,7 @@ def _write_metadata(recipe: dict): # TODO: Investigate whether we might be better served by an SQLite database. metadata = recipe.copy() # Remove steps, as not needed, and might contain non-serialisable types. - metadata.pop("parallel", None) - metadata.pop("collate", None) + metadata.pop("steps", None) with open("meta.json", "wt", encoding="UTF-8") as fp: json.dump(metadata, fp) os.sync() diff --git a/tests/test_common.py b/tests/test_common.py index a639f06b1..5f55c0897 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -25,12 +25,12 @@ def test_parse_recipe_string(): """Loading and parsing of a YAML recipe from a string.""" valid_recipe = """\ - parallel: + steps: operator: misc.noop arg1: Hello """ parsed = common.parse_recipe(valid_recipe) - assert parsed == {"parallel": {"operator": "misc.noop", "arg1": "Hello"}} + assert parsed == {"steps": {"operator": "misc.noop", "arg1": "Hello"}} def test_parse_recipe_path(): @@ -39,7 +39,7 @@ def test_parse_recipe_path(): expected = { "title": "Noop", "description": "A recipe that does nothing. Only used for testing.", - "parallel": [ + "steps": [ { "operator": "misc.noop", "test_argument": "Banana", @@ -47,7 +47,6 @@ def test_parse_recipe_path(): "substep": {"operator": "constraints.combine_constraints"}, } ], - "collate": [{"operator": "misc.noop"}], } assert parsed == expected diff --git a/tests/test_run_recipes.py b/tests/test_run_recipes.py index f395e0e62..88b8a2931 100644 --- a/tests/test_run_recipes.py +++ b/tests/test_run_recipes.py @@ -16,7 +16,6 @@ import json from pathlib import Path -from uuid import uuid4 import pytest @@ -47,44 +46,28 @@ def test_get_operator_exception_not_callable(): CSET.operators.get_operator("misc.__doc__") -def test_execute_recipe_parallel(tmp_path: Path): +def test_execute_recipe(tmp_path: Path): """Execute recipe to test happy case (this is really an integration test).""" input_file = Path("tests/test_data/air_temp.nc") - output_dir = tmp_path / f"{uuid4()}" - recipe_file = Path("tests/test_data/plot_instant_air_temp.yaml") - CSET.operators.execute_recipe_parallel(recipe_file, input_file, output_dir) + recipe = Path("tests/test_data/plot_instant_air_temp.yaml") + CSET.operators.execute_recipe(recipe, input_file, tmp_path) -def test_execute_recipe_parallel_edge_cases(tmp_path: Path): +def test_execute_recipe_edge_cases(tmp_path: Path): """Test weird edge cases. Also tests data paths not being pathlib Paths.""" input_file = "tests/test_data/air_temp.nc" - output_dir = tmp_path / f"{uuid4()}" recipe = Path("tests/test_data/noop_recipe.yaml") - CSET.operators.execute_recipe_parallel(recipe, input_file, output_dir) + CSET.operators.execute_recipe(recipe, input_file, tmp_path) -def test_execute_recipe_parallel_invalid_output_dir(tmp_path: Path): +def test_execute_recipe_invalid_output_dir(tmp_path: Path): """Exception raised if output directory can't be created.""" - recipe = '{"parallel":[{"operator": misc.noop}]}' + recipe = '{"steps":[{"operator": misc.noop}]}' input_file = Path("tests/test_data/air_temp.nc") output_dir = tmp_path / "actually_a_file" output_dir.touch() with pytest.raises((FileExistsError, NotADirectoryError)): - CSET.operators.execute_recipe_parallel(recipe, input_file, output_dir) - - -def test_execute_recipe_collate(tmp_path: Path): - """Execute collate from a recipe.""" - output_dir = tmp_path - recipe_file = Path("tests/test_data/noop_recipe.yaml") - CSET.operators.execute_recipe_collate(recipe_file, output_dir) - - -def test_execute_recipe_collate_no_steps(tmp_path: Path): - """Execute collate for a recipe without any collate steps.""" - recipe = '{"parallel":[{"operator": misc.noop}]}' - output_dir = tmp_path - CSET.operators.execute_recipe_collate(recipe, output_dir) + CSET.operators.execute_recipe(recipe, input_file, output_dir) def test_run_steps_style_file_metadata_written(tmp_path: Path): From 553ab609b7373ec7e7371f5d4d1bab86eb380fc3 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 11:16:07 +0100 Subject: [PATCH 32/90] Fix run_cset_recipe tests --- tests/workflow_utils/test_run_cset_recipe.py | 78 +++++--------------- 1 file changed, 18 insertions(+), 60 deletions(-) diff --git a/tests/workflow_utils/test_run_cset_recipe.py b/tests/workflow_utils/test_run_cset_recipe.py index 8ce4eb224..8ae372a92 100644 --- a/tests/workflow_utils/test_run_cset_recipe.py +++ b/tests/workflow_utils/test_run_cset_recipe.py @@ -24,17 +24,11 @@ from CSET._workflow_utils import run_cset_recipe -def test_subprocess_env(monkeypatch): +def test_subprocess_env(): """Test subprocess_env function.""" - monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", "2000-01-01T00:00:00Z") - monkeypatch.setenv("CSET_ADDOPTS", "--other-opts") - expected = { - "CYLC_TASK_CYCLE_POINT": "2000-01-01T00:00:00Z", - "CSET_ADDOPTS": "--other-opts --VALIDITY_TIME=2000-01-01T00:00:00Z", - } + expected = dict(os.environ) actual = run_cset_recipe.subprocess_env() - for expected_item in expected.items(): - assert expected_item in actual.items() + assert actual == expected def test_recipe_file(monkeypatch, tmp_working_dir): @@ -50,15 +44,12 @@ def test_recipe_id(monkeypatch, tmp_working_dir): def mock_recipe_file(): with open("recipe.yaml", "wt", encoding="UTF-8") as fp: - fp.write("title: Recipe Title\nparallel: [{operator: misc.noop}]") + fp.write("title: Recipe Title\nsteps: [{operator: misc.noop}]") return "recipe.yaml" - def mock_subprocess_env(): - return os.environ - + monkeypatch.setenv("MODEL_NUMBER", "1") monkeypatch.setattr(run_cset_recipe, "recipe_file", mock_recipe_file) - monkeypatch.setattr(run_cset_recipe, "subprocess_env", mock_subprocess_env) - expected = "recipe_title" + expected = "m1_recipe_title" actual = run_cset_recipe.recipe_id() assert actual == expected @@ -71,11 +62,8 @@ def mock_recipe_file(): fp.write("Not a recipe!") return "recipe.yaml" - def mock_subprocess_env(): - return os.environ - + monkeypatch.setenv("MODEL_NUMBER", "1") monkeypatch.setattr(run_cset_recipe, "recipe_file", mock_recipe_file) - monkeypatch.setattr(run_cset_recipe, "subprocess_env", mock_subprocess_env) with pytest.raises(subprocess.CalledProcessError): run_cset_recipe.recipe_id() @@ -88,8 +76,9 @@ def mock_recipe_id(): monkeypatch.setattr(run_cset_recipe, "recipe_id", mock_recipe_id) monkeypatch.setenv("CYLC_WORKFLOW_SHARE_DIR", "/share") + monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", "20000101T0000Z") actual = run_cset_recipe.output_directory() - expected = "/share/web/plots/recipe_id" + expected = "/share/web/plots/recipe_id_20000101T0000Z" assert actual == expected @@ -97,7 +86,8 @@ def test_data_directory(monkeypatch): """Data directory correctly interpreted.""" monkeypatch.setenv("CYLC_WORKFLOW_SHARE_DIR", "/share") monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", "20000101T0000Z") - expected = "/share/cycle/20000101T0000Z/data" + monkeypatch.setenv("MODEL_NUMBER", "1") + expected = "/share/cycle/20000101T0000Z/data/1" actual = run_cset_recipe.data_directory() assert actual == expected @@ -117,46 +107,14 @@ def test_create_diagnostic_archive(tmp_path): assert set(archive.namelist()) == files -def test_entrypoint_parallel(monkeypatch): - """Check that parallel run_cset_recipe only runs parallel function.""" - - def assert_true(): - assert True - - def assert_false(): - assert False, "collate() during parallel job." # noqa: B011 - - monkeypatch.setenv("CSET_BAKE_MODE", "parallel") - monkeypatch.setattr(run_cset_recipe, "parallel", assert_true) - monkeypatch.setattr(run_cset_recipe, "collate", assert_false) - - run_cset_recipe.run() - - -def test_entrypoint_collate(monkeypatch): - """Check that collate run_cset_recipe only runs collate function.""" +def test_entrypoint(monkeypatch): + """Check that run_cset_recipe.run() calls the correct function.""" + function_ran = False def assert_true(): - assert True - - def assert_false(): - assert False, "parallel() during collate job." # noqa: B011 - - monkeypatch.setenv("CSET_BAKE_MODE", "collate") - monkeypatch.setattr(run_cset_recipe, "parallel", assert_false) - monkeypatch.setattr(run_cset_recipe, "collate", assert_true) - - run_cset_recipe.run() - - -def test_entrypoint_neither(monkeypatch): - """Check that other CSET_BAKE_MODE runs no functions.""" - - def assert_false(): - assert False, "unwanted processing." # noqa: B011 - - monkeypatch.setenv("CSET_BAKE_MODE", "") - monkeypatch.setattr(run_cset_recipe, "parallel", assert_false) - monkeypatch.setattr(run_cset_recipe, "collate", assert_false) + nonlocal function_ran + function_ran = True + monkeypatch.setattr(run_cset_recipe, "run_recipe_steps", assert_true) run_cset_recipe.run() + assert function_ran, "Function did not run!" From 3ec2c855ad760616190530ba9570d5dfc56d1ed4 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 11:22:47 +0100 Subject: [PATCH 33/90] Cover error case in cset graph --- tests/test_graph.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_graph.py b/tests/test_graph.py index ddce3a1bb..9eb4f7b19 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -38,10 +38,16 @@ def test_save_graph_detailed(tmp_path: Path): def test_save_graph_no_operators_exception(): - """Exception raised from recipe with no operators.""" + """Exception raised from recipe with no operators in its steps.""" with pytest.raises(ValueError): # Inline YAML form used. - graph.save_graph('{"parallel": [{"argument": "no_operators"}]}') + graph.save_graph('{"steps": [{"argument": "no_operators"}]}') + + +def test_save_graph_no_steps_exception(): + """Exception raised from recipe with no steps.""" + with pytest.raises(ValueError): + graph.save_graph("title: Recipe with no steps") def test_save_graph_auto_open_xdg_open(tmp_path: Path, monkeypatch): From 840ec57cf397dd88b9fdbb96c8a59aaeb3856eca Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 12:31:10 +0100 Subject: [PATCH 34/90] Add some more tests --- src/CSET/_workflow_utils/run_cset_recipe.py | 68 +++++----- src/CSET/operators/regrid.py | 121 ++++++++++-------- tests/operators/test_regrid.py | 3 +- ..._run_recipes.py => test_execute_recipe.py} | 0 tests/workflow_utils/test_run_cset_recipe.py | 32 +++++ 5 files changed, 135 insertions(+), 89 deletions(-) rename tests/{test_run_recipes.py => test_execute_recipe.py} (100%) diff --git a/src/CSET/_workflow_utils/run_cset_recipe.py b/src/CSET/_workflow_utils/run_cset_recipe.py index 51f29637d..fe97c3814 100755 --- a/src/CSET/_workflow_utils/run_cset_recipe.py +++ b/src/CSET/_workflow_utils/run_cset_recipe.py @@ -34,20 +34,23 @@ def recipe_id(): """Get the ID for the recipe.""" file = recipe_file() env = subprocess_env() - p = subprocess.run( - ("cset", "recipe-id", "--recipe", file), - capture_output=True, - env=env, - ) - # Explicitly check return code as otherwise we can't get the error message. - if p.returncode != 0: - logging.error( - "cset recipe-id returned non-zero exit code.\n%s", + try: + p = subprocess.run( + ("cset", "recipe-id", "--recipe", file), + capture_output=True, + check=True, + env=env, + ) + except subprocess.CalledProcessError as err: + logging.exception( + "cset recipe-id exited with non-zero code %s.\nstdout: %s\nstderr: %s", + err.returncode, # Presume that subprocesses have the same IO encoding as this one. # Honestly, on all our supported platforms this will be "utf-8". - p.stderr.decode(sys.stderr.encoding), + err.stdout.decode(sys.stdout.encoding), + err.stderr.decode(sys.stderr.encoding), ) - p.check_returncode() + raise id = p.stdout.decode(sys.stdout.encoding).strip() model_number = os.environ["MODEL_NUMBER"] return f"m{model_number}_{id}" @@ -82,32 +85,27 @@ def create_diagnostic_archive(output_directory): def run_recipe_steps(): - """Collate processed data together and produce output plot. - - If the intermediate directory doesn't exist then we are running a simple - non-parallelised recipe, and we need to run cset bake to process the data - and produce any plots. So we actually get some usage out of it, we are using - the non-restricted form of bake, so it runs both the processing and - collation steps. - """ + """Process data and produce output plots.""" try: - logging.info("Collating intermediate data and saving output.") - subprocess.run( - ( - "cset", - "-v", - "bake", - f"--recipe={recipe_file()}", - f"--input-dir={data_directory()}", - f"--output-dir={output_directory()}", - f"--style-file={os.getenv('COLORBAR_FILE', '')}", - f"--plot-resolution={os.getenv('PLOT_RESOLUTION', '')}", - ), - check=True, - env=subprocess_env(), + command = ( + "cset", + "-v", + "bake", + f"--recipe={recipe_file()}", + f"--input-dir={data_directory()}", + f"--output-dir={output_directory()}", + f"--style-file={os.getenv('COLORBAR_FILE', '')}", + f"--plot-resolution={os.getenv('PLOT_RESOLUTION', '')}", + ) + logging.info("Running %s", " ".join(command)) + subprocess.run(command, check=True, env=subprocess_env(), capture_output=True) + except subprocess.CalledProcessError as err: + logging.exception( + "cset bake exited with non-zero code %s.\nstdout: %s\nstderr: %s", + err.returncode, + err.stdout.decode(sys.stdout.encoding), + err.stderr.decode(sys.stderr.encoding), ) - except subprocess.CalledProcessError: - logging.error("cset bake exited non-zero while collating.") raise create_diagnostic_archive(output_directory()) diff --git a/src/CSET/operators/regrid.py b/src/CSET/operators/regrid.py index a141b63f6..cd5b66f6d 100644 --- a/src/CSET/operators/regrid.py +++ b/src/CSET/operators/regrid.py @@ -45,20 +45,21 @@ def regrid_onto_cube( Arguments ---------- toregrid: iris.cube | iris.cube.CubeList - An iris Cube of data to regrid, or multiple cubes to regrid in a CubeList. - A minimum requirement is that the cube(s) need to be 2D with a latitude, - longitude coordinates. + An iris Cube of data to regrid, or multiple cubes to regrid in a + CubeList. A minimum requirement is that the cube(s) need to be 2D with a + latitude, longitude coordinates. target: Cube - An iris cube of the data to regrid onto. It needs to be 2D with a latitude, - longitude coordinate. + An iris cube of the data to regrid onto. It needs to be 2D with a + latitude, longitude coordinate. method: str Method used to regrid onto, etc. Linear will use iris.analysis.Linear() Returns ------- iris.cube | iris.cube.CubeList - An iris cube of the data that has been regridded, or a CubeList of the cubes - that have been regridded in the same order they were passed in toregrid. + An iris cube of the data that has been regridded, or a CubeList of the + cubes that have been regridded in the same order they were passed in + toregrid. Raises ------ @@ -119,9 +120,9 @@ def regrid_onto_xyspacing( Parameters ---------- toregrid: iris.cube | iris.cube.CubeList - An iris cube of the data to regrid, or multiple cubes to regrid in a cubelist. - A minimum requirement is that the cube(s) need to be 2D with a latitude, - longitude coordinates. + An iris cube of the data to regrid, or multiple cubes to regrid in a + cubelist. A minimum requirement is that the cube(s) need to be 2D with a + latitude, longitude coordinates. xspacing: integer Spacing of points in longitude direction (could be degrees, meters etc.) yspacing: integer @@ -132,8 +133,9 @@ def regrid_onto_xyspacing( Returns ------- iris.cube | iris.cube.CubeList - An iris cube of the data that has been regridded, or a cubelist of the cubes - that have been regridded in the same order they were passed in toregrid. + An iris cube of the data that has been regridded, or a cubelist of the + cubes that have been regridded in the same order they were passed in + toregrid. Raises ------ @@ -197,28 +199,37 @@ def regrid_onto_xyspacing( def regrid_to_single_point( - incube: iris.cube.Cube, lat_pt: float, lon_pt: float, method: str, **kwargs + cube: iris.cube.Cube, + lat_pt: float, + lon_pt: float, + method: str, + boundary_margin: int = 8, + **kwargs, ) -> iris.cube.Cube: """Select data at a single point by longitude and latitude. - Selection of model grid point is performed by a regrid function, either selecting the - nearest gridpoint to the selected longitude and latitude values or using linear - interpolation across the surrounding points. + Selection of model grid point is performed by a regrid function, either + selecting the nearest gridpoint to the selected longitude and latitude + values or using linear interpolation across the surrounding points. Parameters ---------- - incube: Cube - An iris cube of the data to regrid. As a minimum, it needs to be 2D with latitude, - longitude coordinates. + cube: Cube + An iris cube of the data to regrid. As a minimum, it needs to be 2D with + latitude, longitude coordinates. lon_pt: float Selected value of longitude. lat_pt: float Selected value of latitude. method: str - Method used to determine the values at the selected longitude and latitude. - The recommended approach is to use iris.analysis.Nearest(), which selects the - nearest gridpoint. An alternative is iris.analysis.Linear(), which obtains - the values at the selected longitude and latitude by linear interpolation. + Method used to determine the values at the selected longitude and + latitude. The recommended approach is to use iris.analysis.Nearest(), + which selects the nearest gridpoint. An alternative is + iris.analysis.Linear(), which obtains the values at the selected + longitude and latitude by linear interpolation. + boundary_margin: int, optional + Number of grid points from the domain boundary considered "unreliable". + Defaults to 8. Returns ------- @@ -229,39 +240,40 @@ def regrid_to_single_point( Raises ------ ValueError - If a unique x/y coordinate cannot be found; also if, for selecting a single - gridpoint, the chosen longitude and latitude point is outside the domain. + If a unique x/y coordinate cannot be found; also if, for selecting a + single gridpoint, the chosen longitude and latitude point is outside the + domain. NotImplementedError If the cubes grid, or the method for regridding, is not yet supported. Notes ----- - The acceptable coordinate names for X and Y coordinates are currently described - in X_COORD_NAMES and Y_COORD_NAMES. These cover commonly used coordinate types, - though a user can append new ones. - Currently rectilinear grids (uniform) are supported. - Warnings are raised if the selected gridpoint is within eight gridlengths of the - domain boundary as data here is potentially unreliable. + The acceptable coordinate names for X and Y coordinates are currently + described in X_COORD_NAMES and Y_COORD_NAMES. These cover commonly used + coordinate types, though a user can append new ones. Currently rectilinear + grids (uniform) are supported. Warnings are raised if the selected gridpoint + is within boundary_margin grid lengths of the domain boundary as data here + is potentially unreliable. """ # Get x and y coordinate names. - y_coord, x_coord = get_cube_yxcoordname(incube) + y_coord, x_coord = get_cube_yxcoordname(cube) # List of supported grids - check if it is compatible # NOTE: The "RotatedGeogCS" option below seems to be required for rotated grids -- # this may need to be added in other places in these Operators. supported_grids = (iris.coord_systems.GeogCS, iris.coord_systems.RotatedGeogCS) - if not isinstance(incube.coord(x_coord).coord_system, supported_grids): + if not isinstance(cube.coord(x_coord).coord_system, supported_grids): raise NotImplementedError( - f"Does not currently support {incube.coord(x_coord).coord_system} regrid method" + f"Does not currently support {cube.coord(x_coord).coord_system} regrid method" ) - if not isinstance(incube.coord(y_coord).coord_system, supported_grids): + if not isinstance(cube.coord(y_coord).coord_system, supported_grids): raise NotImplementedError( - f"Does not currently support {incube.coord(y_coord).coord_system} regrid method" + f"Does not currently support {cube.coord(y_coord).coord_system} regrid method" ) # Get axis - lat, lon = incube.coord(y_coord), incube.coord(x_coord) + lat, lon = cube.coord(y_coord), cube.coord(x_coord) # Get bounds lat_min, lon_min = lat.points.min(), lon.points.min() @@ -270,8 +282,14 @@ def regrid_to_single_point( # Get bounds # Boundaries of frame to avoid selecting gridpoint close to domain edge # Currently hardwired to 8 but could be a user input - lat_min_bound, lon_min_bound = lat.points[7], lon.points[7] - lat_max_bound, lon_max_bound = lat.points[-8], lon.points[-8] + lat_min_bound, lon_min_bound = ( + lat.points[boundary_margin - 1], + lon.points[boundary_margin - 1], + ) + lat_max_bound, lon_max_bound = ( + lat.points[-boundary_margin], + lon.points[-boundary_margin], + ) # Check to see if selected point is outside the domain if ( @@ -281,21 +299,20 @@ def regrid_to_single_point( or (lon_pt > lon_max) ): raise ValueError("Selected point is outside the domain.") - else: - if ( - (lat_pt < lat_min_bound) - or (lat_pt > lat_max_bound) - or (lon_pt < lon_min_bound) - or (lon_pt > lon_max_bound) - ): - warnings.warn( - "Selected point is within 8 gridlengths of the domain edge.", - category=BoundaryWarning, - stacklevel=2, - ) + elif ( + (lat_pt < lat_min_bound) + or (lat_pt > lat_max_bound) + or (lon_pt < lon_min_bound) + or (lon_pt > lon_max_bound) + ): + warnings.warn( + f"Selected point is within {boundary_margin} gridlengths of the domain edge, data may be unreliable.", + category=BoundaryWarning, + stacklevel=2, + ) regrid_method = getattr(iris.analysis, method, None) if not callable(regrid_method): raise NotImplementedError(f"Does not currently support {method} regrid method") - cube_rgd = incube.interpolate(((lat, lat_pt), (lon, lon_pt)), regrid_method()) + cube_rgd = cube.interpolate(((lat, lat_pt), (lon, lon_pt)), regrid_method()) return cube_rgd diff --git a/tests/operators/test_regrid.py b/tests/operators/test_regrid.py index 4312b799e..b2881ac9b 100644 --- a/tests/operators/test_regrid.py +++ b/tests/operators/test_regrid.py @@ -160,14 +160,13 @@ def test_regrid_to_single_point(cube): assert repr(regrid_cube) == expected_cube -@pytest.mark.filterwarnings("ignore:Selected point is within") def test_regrid_to_single_point_missing_coord(cube): """Missing coordinate raises error.""" # Missing X coordinate. source = cube.copy() source.remove_coord("grid_longitude") with pytest.raises(ValueError): - regrid.regrid_to_single_point(source, 0.5, 358.5, "Nearest") + regrid.regrid_to_single_point(source, 0.5, 358.5, "Nearest", boundary_margin=0) # Missing Y coordinate. source = cube.copy() diff --git a/tests/test_run_recipes.py b/tests/test_execute_recipe.py similarity index 100% rename from tests/test_run_recipes.py rename to tests/test_execute_recipe.py diff --git a/tests/workflow_utils/test_run_cset_recipe.py b/tests/workflow_utils/test_run_cset_recipe.py index 8ae372a92..7519123df 100644 --- a/tests/workflow_utils/test_run_cset_recipe.py +++ b/tests/workflow_utils/test_run_cset_recipe.py @@ -118,3 +118,35 @@ def assert_true(): monkeypatch.setattr(run_cset_recipe, "run_recipe_steps", assert_true) run_cset_recipe.run() assert function_ran, "Function did not run!" + + +def test_run_recipe_steps(monkeypatch, tmp_working_dir): + """Test run recipe steps correctly runs CSET and creates an archive.""" + + def mock_func(*args, **kwargs): + pass + + monkeypatch.setattr(subprocess, "run", mock_func) + monkeypatch.setattr(run_cset_recipe, "create_diagnostic_archive", mock_func) + monkeypatch.setattr(run_cset_recipe, "recipe_file", mock_func) + monkeypatch.setattr(run_cset_recipe, "output_directory", mock_func) + monkeypatch.setattr(run_cset_recipe, "data_directory", mock_func) + run_cset_recipe.run_recipe_steps() + + +def test_run_recipe_steps_exception(monkeypatch, tmp_working_dir): + """Test run recipe steps correctly raises exception on cset bake error.""" + + def mock_subprocess_run(*args, **kwargs): + raise subprocess.CalledProcessError(1, args, b"", b"") + + def mock_func(*args, **kwargs): + pass + + monkeypatch.setattr(subprocess, "run", mock_subprocess_run) + monkeypatch.setattr(run_cset_recipe, "create_diagnostic_archive", mock_func) + monkeypatch.setattr(run_cset_recipe, "recipe_file", mock_func) + monkeypatch.setattr(run_cset_recipe, "output_directory", mock_func) + monkeypatch.setattr(run_cset_recipe, "data_directory", mock_func) + with pytest.raises(subprocess.CalledProcessError): + run_cset_recipe.run_recipe_steps() From d7c997b1b6f913f737e40b9df25633f7714e54a2 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 12:39:19 +0100 Subject: [PATCH 35/90] Convert some references to parallel to steps --- src/CSET/_common.py | 4 ++-- tests/test_cli.py | 2 +- tests/test_common.py | 30 ++++++++++++++++-------------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/CSET/_common.py b/src/CSET/_common.py index 6535aff4f..3de082ce0 100644 --- a/src/CSET/_common.py +++ b/src/CSET/_common.py @@ -57,7 +57,7 @@ def parse_recipe(recipe_yaml: Union[Path, str], variables: dict = None): Examples -------- >>> CSET._common.parse_recipe(Path("myrecipe.yaml")) - {'parallel': [{'operator': 'misc.noop'}]} + {'steps': [{'operator': 'misc.noop'}]} """ # Ensure recipe_yaml is something the YAML parser can read. if isinstance(recipe_yaml, str): @@ -87,7 +87,7 @@ def check_recipe_has_steps(recipe: dict): """Check a recipe has the minimum required steps. Checking that the recipe actually has some steps, and providing helpful - error messages otherwise. We must have at least a parallel step, as that + error messages otherwise. We must have at least a steps step, as that reads the raw data. Parameters diff --git a/tests/test_cli.py b/tests/test_cli.py index 2834fb4c0..f422316e6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -69,7 +69,7 @@ def test_bake_invalid_args(): def test_bake_invalid_args_input_dir(): - """Missing required input-dir argument for parallel.""" + """Missing required input-dir argument for bake.""" with pytest.raises(subprocess.CalledProcessError): subprocess.run( ["cset", "bake", "--recipe=foo", "--output-dir=/tmp"], check=True diff --git a/tests/test_common.py b/tests/test_common.py index 5f55c0897..5b8b728d5 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -81,16 +81,16 @@ def test_parse_recipe_exception_blank(): common.parse_recipe("") -def test_parse_recipe_exception_no_parallel(): - """Exception for recipe without any parallel steps.""" +def test_parse_recipe_exception_no_steps(): + """Exception for recipe without any steps steps.""" with pytest.raises(ValueError): - common.parse_recipe("parallel: []") + common.parse_recipe("steps: []") -def test_parse_recipe_exception_parallel_not_sequence(): - """Exception for recipe with parallel containing an atom.""" +def test_parse_recipe_exception_steps_not_sequence(): + """Exception for recipe with steps containing an atom.""" with pytest.raises(ValueError): - common.parse_recipe("parallel: 7") + common.parse_recipe("steps: 7") def test_parse_recipe_exception_non_dict(): @@ -138,14 +138,22 @@ def test_parse_variable_options_quoted(): def test_template_variables(): """Multiple variables are correctly templated into recipe.""" - recipe = {"parallel": [{"operator": "misc.noop", "v1": "$VAR_A", "v2": "$VAR_B"}]} + recipe = { + "steps": [{"operator": "misc.noop", "v1": "$VAR_A", "v2": "$VAR_B", "v3": 0}] + } variables = {"VAR_A": 42, "VAR_B": 3.14} - expected = {"parallel": [{"operator": "misc.noop", "v1": 42, "v2": 3.14}]} + expected = {"steps": [{"operator": "misc.noop", "v1": 42, "v2": 3.14, "v3": 0}]} actual = common.template_variables(recipe, variables) assert actual == expected assert recipe == expected +def test_template_variables_wrong_recipe_type(): + """Give wrong type for recipe.""" + with pytest.raises(TypeError): + common.template_variables(1, {}) + + def test_replace_template_variable(): """Placeholders are correctly substituted.""" # Test direct substitution. @@ -164,12 +172,6 @@ def test_replace_template_variable(): common.replace_template_variable("$VAR", {}) -def test_template_variables_wrong_recipe_type(): - """Give wrong type for recipe.""" - with pytest.raises(TypeError): - common.template_variables(1, {}) - - def test_get_recipe_meta(tmp_working_dir): """Reading metadata from disk.""" # Default for missing file. From 1948f0572bdab73eb482098d92b1c0e1b4326627 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 12:44:08 +0100 Subject: [PATCH 36/90] Make recipe validation tests more specific --- tests/test_common.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_common.py b/tests/test_common.py index 5b8b728d5..358996863 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -59,43 +59,45 @@ def test_parse_recipe_exception_missing(): def test_parse_recipe_exception_type(): """Exception for incorrect type.""" - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="recipe_yaml must be a str or Path."): common.parse_recipe(True) def test_parse_recipe_exception_invalid_yaml(): """Exception for invalid YAML.""" - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="ParserError: Invalid YAML"): common.parse_recipe('"Inside quotes" outside of quotes') def test_parse_recipe_exception_invalid_recipe(): """Exception for valid YAML but invalid recipe.""" - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Recipe must contain a 'steps' key."): common.parse_recipe("a: 1") def test_parse_recipe_exception_blank(): """Exception for blank recipe.""" - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="Recipe must contain a mapping."): common.parse_recipe("") def test_parse_recipe_exception_no_steps(): """Exception for recipe without any steps steps.""" - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Recipe must have at least 1 step"): common.parse_recipe("steps: []") def test_parse_recipe_exception_steps_not_sequence(): """Exception for recipe with steps containing an atom.""" - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match="'steps' key must contain a sequence of steps." + ): common.parse_recipe("steps: 7") def test_parse_recipe_exception_non_dict(): """Exception for recipe that parses to a non-dict.""" - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="Recipe must contain a mapping."): common.parse_recipe("[]") From 1c2328ec92b08dc1f97ede1c91aa90330f60b9cd Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 13:09:28 +0100 Subject: [PATCH 37/90] Remove old version of fetch_data --- .../_workflow_utils/fetch_data_filesystem.py | 67 ------ .../_workflow_utils/validity_time_tester.py | 223 ------------------ src/CSET/operators/regrid.py | 4 +- tests/workflow_utils/test_fetch_data.py | 5 +- 4 files changed, 3 insertions(+), 296 deletions(-) delete mode 100755 src/CSET/_workflow_utils/fetch_data_filesystem.py delete mode 100755 src/CSET/_workflow_utils/validity_time_tester.py diff --git a/src/CSET/_workflow_utils/fetch_data_filesystem.py b/src/CSET/_workflow_utils/fetch_data_filesystem.py deleted file mode 100755 index d50eb0dc0..000000000 --- a/src/CSET/_workflow_utils/fetch_data_filesystem.py +++ /dev/null @@ -1,67 +0,0 @@ -#! /usr/bin/env python3 - -"""Retrieve the files from the filesystem for the current cycle point.""" - -import glob -import logging -import os -import shutil -from datetime import datetime - -import isodate - -from CSET._workflow_utils import validity_time_tester - -logging.basicConfig( - level=os.getenv("LOGLEVEL", "INFO"), format="%(asctime)s %(levelname)s %(message)s" -) - -# os.getenv("DATA_SOURCE") - - -def template_file_path(): - """Fill time placeholders to generate a file path to fetch.""" - raw_path = os.environ["DATA_PATH"] - date_type = os.environ["DATE_TYPE"] - data_period = isodate.parse_duration(os.getenv("DATA_PERIOD")) - data_time = datetime.fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]) - forecast_length = isodate.parse_duration(os.environ["CSET_VERIFICATION_PERIOD"]) - forecast_offset = isodate.parse_duration(os.environ["CSET_VERIFICATION_OFFSET"]) - - placeholder_times = set() - match date_type: - case "validity": - date = data_time - while date < data_time + forecast_length: - placeholder_times.add(date) - date += data_period - case "initiation": - placeholder_times.add(data_time) - case "lead": - # TODO: Figure out how we are doing lead time. - pass - case _: - raise ValueError(f"Invalid date type: {date_type}") - - -# Excluded from coverage temporarily as script has be rewritten when data time -# cycling lands. -def run(): # pragma: no cover - """Run workflow script.""" - cycle_share_data_dir = f"{os.getenv('CYLC_WORKFLOW_SHARE_DIR')}/cycle/{os.getenv('CYLC_TASK_CYCLE_POINT')}/data" - os.makedirs(cycle_share_data_dir, exist_ok=True) - if os.getenv("CSET_FILE_NAME_METADATA_PATTERN"): - test_filename = validity_time_tester.create_validity_time_tester( - pattern=os.getenv("CSET_FILE_NAME_METADATA_PATTERN"), - validity_time=os.getenv("CYLC_TASK_CYCLE_POINT"), - period_length=os.getenv("CSET_CYCLE_PERIOD"), - times_per_file=int(os.getenv("CSET_TIMES_PER_FILE")), - time_offset=int(os.getenv("CSET_FILE_TIME_OFFSET")), - ) - else: - # Let all non-empty filenames through. - test_filename = None - - for file in filter(test_filename, glob.iglob(os.getenv("CSET_INPUT_FILE_PATH"))): - logging.info("Copying %s", file) - shutil.copy(file, cycle_share_data_dir) diff --git a/src/CSET/_workflow_utils/validity_time_tester.py b/src/CSET/_workflow_utils/validity_time_tester.py deleted file mode 100755 index 16abcb036..000000000 --- a/src/CSET/_workflow_utils/validity_time_tester.py +++ /dev/null @@ -1,223 +0,0 @@ -#! /usr/bin/env python3 - -"""Development module for getting the correct file for a validity time.""" - -import logging -import re -from datetime import datetime, timedelta - -import isodate - -# This file is excluded from coverage testing as it will be removed when the -# data time cycling is merged. - - -def word_month_to_num(month: str) -> int: # pragma: no cover - """Convert a string month into the corresponding number. - - E.g. "January" -> 1, "feb" -> 2. - - Returns - ------- - KeyError: - If month is not a valid month name. - """ - month_mappings = { - "jan": 1, - "feb": 2, - "mar": 3, - "apr": 4, - "may": 5, - "jun": 6, - "jul": 7, - "aug": 8, - "sep": 9, - "oct": 10, - "nov": 11, - "dec": 12, - } - # Leave exceptions to caller. - month_number = month_mappings[month.lower()[:3]] - return month_number - - -def validity_time_direct(times: dict) -> datetime: # pragma: no cover - """Extract the validity time directly.""" - try: - month = times["valid_month"] - except KeyError: - month = word_month_to_num(times["valid_word_month"]) - validity_time = datetime( - int(times["valid_year"]), - int(month), - int(times["valid_day"]), - int(times.get("valid_minute", 0)), - int(times.get("valid_hour", 0)), - ) - return validity_time - - -def validity_time_from_init_time(times: dict) -> datetime: # pragma: no cover - """Derive the validity time from the initiation time and lead time.""" - try: - month = times["init_month"] - except KeyError: - month = word_month_to_num(times["init_word_month"]) - initiation_time = datetime( - int(times["init_year"]), - int(month), - int(times["init_day"]), - int(times.get("init_hour", 0)), - int(times.get("init_minute", 0)), - ) - lead_time = timedelta(hours=int(times["lead_hour"])) - validity_time = initiation_time + lead_time - return validity_time - - -def all_validity_info(pattern: str) -> bool: # pragma: no cover - """Check the validity time is present.""" - return ( - "{valid_year}" in pattern - and ("{valid_month}" in pattern or "{valid_word_month}" in pattern) - and "valid_day" in pattern - ) - - -def all_init_info(pattern: str) -> bool: # pragma: no cover - """Check the initiation time and lead time are present.""" - return ( - "{init_year}" in pattern - and ("{init_month}" in pattern or "{init_word_month}" in pattern) - and "{init_day}" in pattern - and "{lead_hour}" in pattern - ) - - -def create_validity_time_tester( - pattern: str, - validity_time: str, - period_length: str, - times_per_file: int, - time_offset: int, -) -> callable: # pragma: no cover - """Get a function to test if a filename contains a certain validity time. - - Parameters - ---------- - pattern: str - The pattern of the filename, with time information marked. - validity_time: str - ISO 8601 datetime string of the desired validity time. Any timezone - information are removed, and it is used as a naive datetime. - period_length: str - The length of time between time values in the file as an ISO 8601 - duration. - times_per_file: int - The number of validity times per file. A positive number indicates the - data is after the indicated time, and a negative number indicates the - data is before. - time_offset: int - Indicates the offset in time periods between the marked validity time - and the earliest time in the file. E.g. if the filename was T06, then +2 - would mean the first contained time was T04, while -2 would mean the - first time was T08. - - Returns - ------- - test_function: callable - A function that tests a filename and returns True when the validity time - is contained, and False when not. - - Notes - ----- - The pattern format is the filename with a number of placeholders added to - mark where the time information is. You must have enough information to - get the validity time, either directly from the validity time, or derived - from the initiation time and lead time. - - Validity time placeholders: - * ``{valid_year}`` - * ``{valid_month}`` - * ``{valid_word_month}`` - * ``{valid_day}`` - * ``{valid_hour}`` - * ``{valid_minute}`` - - Initiation time placeholders: - * ``{init_year}`` - * ``{init_month}`` Numeric month, e.g: 02 - * ``{init_word_month}`` Wordy month, e.g: feb - * ``{init_day}`` - * ``{init_hour}`` - * ``{init_minute}`` - * ``{lead_hour}`` - """ - # Check that the pattern has sufficient information. - logging.debug("Original pattern: %s", pattern) - if all_validity_info(pattern): - logging.info("Taking validity time directly from filename.") - calc_validity_time = validity_time_direct - elif all_init_info(pattern): - logging.info("Deriving validity time from initialisation time and lead time.") - calc_validity_time = validity_time_from_init_time - else: - raise ValueError( - "Not enough information to determine validity time in pattern." - ) - - # Construct a regex for capturing the desired information. - replacements = { - # "old": "new", - "{init_year}": r"(?P[0-9]{4})", - "{init_month}": r"(?P[0-9]{2})", - "{init_word_month}": r"(?P[a-zA-Z]{3,9})", - "{init_day}": r"(?P[0-9]{2})", - "{init_hour}": r"(?P[0-9]{2})", - "{init_minute}": r"(?P[0-9]{2})", - "{valid_year}": r"(?P[0-9]{4})", - "{valid_month}": r"(?P[0-9]{2})", - "{valid_word_month}": r"(?P[a-zA-Z]{3,9})", - "{valid_day}": r"(?P[0-9]{2})", - "{valid_hour}": r"(?P[0-9]{2})", - "{valid_minute}": r"(?P[0-9]{2})", - "{lead_hour}": r"(?P[0-9]{2,3})", - } - for key in replacements: - pattern = pattern.replace(key, replacements[key]) - pattern = r".*/?" + pattern - logging.info("Regex: %s", pattern) - - # After converting to datetime remove the timezone so we can just compare - # naive dates for ease. Only one timezone should be used in a set of files. - target_validity_time = datetime.fromisoformat(validity_time).replace(tzinfo=None) - period_duration = isodate.parse_duration(period_length) - start_offset = time_offset * period_duration - end_offset = times_per_file * period_duration - - def test_function(filename: str) -> bool: - """Whether the filename contains the validity time.""" - match = re.match(pattern, filename) - if match is None: - logging.debug("Filename did not match pattern. %s", filename) - return False - times = match.groupdict() - logging.debug("Extracted times: %s", times) - file_time_start = calc_validity_time(times) - start_offset - file_time_end = file_time_start + end_offset - # Extra logic to handle case where first file in a forecast has extra - # time steps. - if ( - target_validity_time < file_time_start - and int(times.get("lead_hour", 1)) == 0 - ): - logging.info( - "Initial forecast file may have extra time steps. Retaining despite appearing before file start." - ) - return True - return ( - file_time_start <= target_validity_time - and target_validity_time < file_time_end - ) - - return test_function diff --git a/src/CSET/operators/regrid.py b/src/CSET/operators/regrid.py index cd5b66f6d..61899e64a 100644 --- a/src/CSET/operators/regrid.py +++ b/src/CSET/operators/regrid.py @@ -279,9 +279,7 @@ def regrid_to_single_point( lat_min, lon_min = lat.points.min(), lon.points.min() lat_max, lon_max = lat.points.max(), lon.points.max() - # Get bounds - # Boundaries of frame to avoid selecting gridpoint close to domain edge - # Currently hardwired to 8 but could be a user input + # Get boundaries of frame to avoid selecting gridpoint close to domain edge lat_min_bound, lon_min_bound = ( lat.points[boundary_margin - 1], lon.points[boundary_margin - 1], diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index eceb44114..dd43b016c 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -14,11 +14,10 @@ """Tests for fetch_data workflow utility.""" -from CSET._workflow_utils import fetch_data_filesystem, validity_time_tester +from CSET._workflow_utils import fetch_data def test_function_exists(): """Placeholder tests before rewriting fetch data utility.""" # TODO: Write tests after switching to new fetch_data utility. - assert callable(fetch_data_filesystem.run) - assert callable(validity_time_tester.create_validity_time_tester) + assert callable(fetch_data.fetch_data) From 76ba1497faa792a23c8a891cad651f52ebbdbbf1 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 15:55:57 +0100 Subject: [PATCH 38/90] Add fetch_data tests --- src/CSET/_workflow_utils/fetch_data.py | 85 +++++++++----- tests/workflow_utils/test_fetch_data.py | 148 +++++++++++++++++++++++- 2 files changed, 201 insertions(+), 32 deletions(-) diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index c4110fc42..1beee8c9f 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -9,6 +9,7 @@ import shutil from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta +from typing import Literal import isodate @@ -39,7 +40,7 @@ def __exit__(self, exc_type, exc_value, traceback): logging.debug("Tearing down FileRetriever.") @abc.abstractmethod - def get_file(self, file_path: str, output_dir: str) -> None: + def get_file(self, file_path: str, output_dir: str) -> None: # pragma: no cover """Save a file from the data source to the output directory. Not all of the given paths will exist, so FileNotFoundErrors should be @@ -82,20 +83,38 @@ def get_file(self, file_path: str, output_dir: str) -> None: logging.warning("Failed to copy %s, error: %s", file, err) -def _template_file_path(): +def _get_needed_environment_variables() -> dict: + """Load the needed variables from the environment.""" + variables = { + "raw_path": os.environ["DATA_PATH"], + "date_type": os.environ["DATE_TYPE"], + "data_time": datetime.fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]), + "forecast_length": isodate.parse_duration(os.environ["CSET_ANALYSIS_PERIOD"]), + "forecast_offset": isodate.parse_duration(os.environ["CSET_ANALYSIS_OFFSET"]), + "share_dir": os.environ["CYLC_WORKFLOW_SHARE_DIR"], + "cycle_point": os.environ["CYLC_TASK_CYCLE_POINT"], + "model_number": os.environ["MODEL_NUMBER"], + } + # Data period is not needed for initiation time. + if variables["date_type"] != "initiation": + variables["data_period"] = isodate.parse_duration(os.environ["DATA_PERIOD"]) + return variables + + +def _template_file_path( + raw_path: str, + date_type: Literal["validity", "initiation", "lead"], + data_time: datetime, + forecast_length: timedelta, + forecast_offset: timedelta, + data_period: timedelta, +) -> list[str]: """Fill time placeholders to generate a file path to fetch.""" - raw_path = os.environ["DATA_PATH"] - date_type = os.environ["DATE_TYPE"] - data_time = datetime.fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]) - forecast_length = isodate.parse_duration(os.environ["CSET_ANALYSIS_PERIOD"]) - forecast_offset = isodate.parse_duration(os.environ["CSET_ANALYSIS_OFFSET"]) - placeholder_times: list[datetime] = [] lead_times: list[timedelta] = [] match date_type: case "validity": date = data_time - data_period = isodate.parse_duration(os.getenv("DATA_PERIOD")) while date < data_time + forecast_length: placeholder_times.append(date) date += data_period @@ -103,7 +122,6 @@ def _template_file_path(): placeholder_times.append(data_time) case "lead": placeholder_times.append(data_time) - data_period = isodate.parse_duration(os.getenv("DATA_PERIOD")) lead_time = forecast_offset while lead_time < forecast_length: lead_times.append(lead_time) @@ -115,43 +133,54 @@ def _template_file_path(): for placeholder_time in placeholder_times: # Expand out all other format strings. path = placeholder_time.strftime(os.path.expandvars(raw_path)) - - # Expand out lead time format strings, %N. - for lead_time in lead_times: - # BUG: Will not respect escaped % signs, e.g: %%N. - paths.append( - path.replace("%N", f"{int(lead_time.total_seconds()) // 3600:03d}") - ) + if lead_times: + # Expand out lead time format strings, %N. + for lead_time in lead_times: + # BUG: Will not respect escaped % signs, e.g: %%N. + paths.append( + path.replace("%N", f"{int(lead_time.total_seconds()) // 3600:03d}") + ) else: paths.append(path) return paths def fetch_data(file_retriever: FileRetriever = FilesystemFileRetriever): - """Fetch the model's data. + """Fetch the data for a model. The following environment variables need to be set: - * CSET_ANALYSIS_OFFSET - * CSET_ANALYSIS_PERIOD - * CYLC_TASK_CYCLE_POINT - * DATA_PATH - * DATA_PERIOD - If DATE_TYPE is not 'initialisation' - * DATE_TYPE - * MODEL_NUMBER + * CSET_ANALYSIS_OFFSET + * CSET_ANALYSIS_PERIOD + * CYLC_TASK_CYCLE_POINT + * CYLC_WORKFLOW_SHARE_DIR + * DATA_PATH + * DATA_PERIOD + * DATE_TYPE + * MODEL_NUMBER Parameters ---------- file_retriever: FileRetriever FileRetriever implementation to use. Defaults to FilesystemFileRetriever. """ + v = _get_needed_environment_variables() + # Prepare output directory. - model_number = os.getenv("MODEL_NUMBER") - cycle_share_data_dir = f"{os.getenv('CYLC_WORKFLOW_SHARE_DIR')}/cycle/{os.getenv('CYLC_TASK_CYCLE_POINT')}/data/{model_number}" + cycle_share_data_dir = ( + f"{v['share_dir']}/cycle/{v['cycle_point']}/data/{v['model_number']}" + ) os.makedirs(cycle_share_data_dir, exist_ok=True) logging.debug("Output directory: %s", cycle_share_data_dir) # Get file paths. - paths = _template_file_path() + paths = _template_file_path( + v["raw_path"], + v["date_type"], + v["data_time"], + v["forecast_length"], + v["forecast_offset"], + v["data_period"], + ) logging.info("Retrieving paths:\n%s", "\n".join(paths)) # Use file retriever to transfer data with multiple threads. diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index dd43b016c..7feeeaede 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -14,10 +14,150 @@ """Tests for fetch_data workflow utility.""" +from datetime import UTC, datetime, timedelta + +import pytest + from CSET._workflow_utils import fetch_data -def test_function_exists(): - """Placeholder tests before rewriting fetch data utility.""" - # TODO: Write tests after switching to new fetch_data utility. - assert callable(fetch_data.fetch_data) +def test_get_needed_environment_variables(monkeypatch): + """Needed environment variables are loaded.""" + duration_raw = "PT1H" + duration = timedelta(hours=1) + date_raw = "20000101T0000Z" + date = datetime(2000, 1, 1, 0, 0, tzinfo=UTC) + path = "/path/to/data" + number_raw = "1" + + monkeypatch.setenv("CSET_ANALYSIS_OFFSET", duration_raw) + monkeypatch.setenv("CSET_ANALYSIS_PERIOD", duration_raw) + monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", date_raw) + monkeypatch.setenv("CYLC_WORKFLOW_SHARE_DIR", path) + monkeypatch.setenv("DATA_PATH", path) + monkeypatch.setenv("DATA_PERIOD", duration_raw) + monkeypatch.setenv("MODEL_NUMBER", number_raw) + monkeypatch.setenv("DATE_TYPE", "validity") + + expected = { + "cycle_point": date_raw, + "data_period": duration, + "data_time": date, + "date_type": "validity", + "forecast_length": duration, + "forecast_offset": duration, + "model_number": number_raw, + "raw_path": path, + "share_dir": path, + } + actual = fetch_data._get_needed_environment_variables() + assert actual == expected + + # Check DATA_PERIOD is not there for initiation. + monkeypatch.setenv("DATE_TYPE", "initiation") + initiation_actual = fetch_data._get_needed_environment_variables() + assert "data_period" not in initiation_actual + + +def test_fetch_data(monkeypatch, tmp_path): + """Test top-level fetch_data function with other calls mocked out.""" + + def mock_get_needed_environment_variables(): + return { + "share_dir": str(tmp_path), + "cycle_point": "20000101T0000Z", + "model_number": "1", + "raw_path": None, + "date_type": None, + "data_time": None, + "forecast_length": None, + "forecast_offset": None, + "data_period": None, + } + + def mock_template_file_path(*args, **kwargs): + return [f"path_{n}" for n in range(5)] + + files_gotten = False + + class MockFileRetriever(fetch_data.FileRetriever): + def get_file(self, file_path: str, output_dir: str) -> None: + nonlocal files_gotten + files_gotten = True + + monkeypatch.setattr( + fetch_data, + "_get_needed_environment_variables", + mock_get_needed_environment_variables, + ) + monkeypatch.setattr(fetch_data, "_template_file_path", mock_template_file_path) + fetch_data.fetch_data(MockFileRetriever) + assert files_gotten + + +def test_template_file_path_validity_time(): + """Test filling path placeholders for validity time.""" + actual = fetch_data._template_file_path( + "/path/%Y-%m-%d.nc", + "validity", + datetime(2000, 1, 1, tzinfo=UTC), + timedelta(days=5), + timedelta(), + timedelta(days=1), + ) + expected = [ + "/path/2000-01-01.nc", + "/path/2000-01-02.nc", + "/path/2000-01-03.nc", + "/path/2000-01-04.nc", + "/path/2000-01-05.nc", + ] + assert actual == expected + + +def test_template_file_path_initiation_time(): + """Test filling path placeholders for initiation time.""" + actual = fetch_data._template_file_path( + "/path/%Y-%m-%d.nc", + "initiation", + datetime(2000, 1, 1, tzinfo=UTC), + timedelta(days=5), + timedelta(), + None, + ) + expected = ["/path/2000-01-01.nc"] + assert actual == expected + + +def test_template_file_path_lead_time(): + """Test filling path placeholders for lead time.""" + actual = fetch_data._template_file_path( + "/path/%N.nc", + "lead", + datetime(2000, 1, 1, tzinfo=UTC), + timedelta(hours=5, seconds=1), + timedelta(hours=1), + timedelta(hours=1), + ) + expected = [ + "/path/001.nc", + "/path/002.nc", + "/path/003.nc", + "/path/004.nc", + "/path/005.nc", + ] + assert actual == expected + + +def test_template_file_path_invalid_date_type(): + """Test error on invalid date type.""" + with pytest.raises(ValueError, match="Invalid date type:"): + fetch_data._template_file_path(None, "Other", None, None, None, None) + + +def test_FilesystemFileRetriever(tmp_path): + """Test retrieving a file from the filesystem.""" + with fetch_data.FilesystemFileRetriever() as ffr: + ffr.get_file("tests/test_data/exeter_em*.nc", str(tmp_path)) + assert (tmp_path / "exeter_em01.nc").is_file() + assert (tmp_path / "exeter_em02.nc").is_file() From f56e8d595127aa07aae3a3a7c0458a22c5b121b5 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 16:02:10 +0100 Subject: [PATCH 39/90] Reference UTC timezone as datetime.timezone.utc The datetime.UTC alias was only added in python 3.11. --- tests/workflow_utils/test_fetch_data.py | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index 7feeeaede..cf657c4a1 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -14,7 +14,7 @@ """Tests for fetch_data workflow utility.""" -from datetime import UTC, datetime, timedelta +import datetime import pytest @@ -24,9 +24,9 @@ def test_get_needed_environment_variables(monkeypatch): """Needed environment variables are loaded.""" duration_raw = "PT1H" - duration = timedelta(hours=1) + duration = datetime.timedelta(hours=1) date_raw = "20000101T0000Z" - date = datetime(2000, 1, 1, 0, 0, tzinfo=UTC) + date = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) path = "/path/to/data" number_raw = "1" @@ -100,10 +100,10 @@ def test_template_file_path_validity_time(): actual = fetch_data._template_file_path( "/path/%Y-%m-%d.nc", "validity", - datetime(2000, 1, 1, tzinfo=UTC), - timedelta(days=5), - timedelta(), - timedelta(days=1), + datetime.datetime(2000, 1, 1, tzinfo=datetime.timezone.utc), + datetime.timedelta(days=5), + datetime.timedelta(), + datetime.timedelta(days=1), ) expected = [ "/path/2000-01-01.nc", @@ -120,9 +120,9 @@ def test_template_file_path_initiation_time(): actual = fetch_data._template_file_path( "/path/%Y-%m-%d.nc", "initiation", - datetime(2000, 1, 1, tzinfo=UTC), - timedelta(days=5), - timedelta(), + datetime.datetime(2000, 1, 1, tzinfo=datetime.timezone.utc), + datetime.timedelta(days=5), + datetime.timedelta(), None, ) expected = ["/path/2000-01-01.nc"] @@ -134,10 +134,10 @@ def test_template_file_path_lead_time(): actual = fetch_data._template_file_path( "/path/%N.nc", "lead", - datetime(2000, 1, 1, tzinfo=UTC), - timedelta(hours=5, seconds=1), - timedelta(hours=1), - timedelta(hours=1), + datetime.datetime(2000, 1, 1, tzinfo=datetime.timezone.utc), + datetime.timedelta(hours=5, seconds=1), + datetime.timedelta(hours=1), + datetime.timedelta(hours=1), ) expected = [ "/path/001.nc", From e8d811713c150713fc6780f6490fd2c411d8fa2e Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 16:15:50 +0100 Subject: [PATCH 40/90] Add workaround for old python not supporting ISO 8601 datetimes This should be removed when we drop python 3.10 support. --- src/CSET/_workflow_utils/fetch_data.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index 1beee8c9f..6813ac047 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -7,6 +7,7 @@ import logging import os import shutil +import sys from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta from typing import Literal @@ -85,10 +86,16 @@ def get_file(self, file_path: str, output_dir: str) -> None: def _get_needed_environment_variables() -> dict: """Load the needed variables from the environment.""" + # Python 3.10 and older don't fully support ISO 8601 datetime formats. + # TODO: Remove once we drop python 3.10. + if sys.version_info.minor < 11: + _fromisoformat = isodate.parse_datetime + else: + _fromisoformat = datetime.fromisoformat variables = { "raw_path": os.environ["DATA_PATH"], "date_type": os.environ["DATE_TYPE"], - "data_time": datetime.fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]), + "data_time": _fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]), "forecast_length": isodate.parse_duration(os.environ["CSET_ANALYSIS_PERIOD"]), "forecast_offset": isodate.parse_duration(os.environ["CSET_ANALYSIS_OFFSET"]), "share_dir": os.environ["CYLC_WORKFLOW_SHARE_DIR"], From 4ed68410eed7d5dd549009c4a5009cdb1dc42593 Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 17:10:07 +0100 Subject: [PATCH 41/90] Test a few corner cases of fetch_data --- src/CSET/_workflow_utils/fetch_data.py | 2 ++ tests/workflow_utils/test_fetch_data.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index 6813ac047..0e0e11a67 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -77,6 +77,8 @@ def get_file(self, file_path: str, output_dir: str) -> None: """ file_paths = glob.glob(os.path.expanduser(file_path)) logging.debug("Copying files:\n%s", "\n".join(file_paths)) + if not file_paths: + logging.warning("file_path does not match any files: %s", file_path) for file in file_paths: try: shutil.copy(file, output_dir) diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index cf657c4a1..34bf325c2 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -15,6 +15,7 @@ """Tests for fetch_data workflow utility.""" import datetime +from pathlib import Path import pytest @@ -161,3 +162,24 @@ def test_FilesystemFileRetriever(tmp_path): ffr.get_file("tests/test_data/exeter_em*.nc", str(tmp_path)) assert (tmp_path / "exeter_em01.nc").is_file() assert (tmp_path / "exeter_em02.nc").is_file() + + +def test_FilesystemFileRetriever_no_files(tmp_path, caplog): + """Test warning when no files match the requested path.""" + with fetch_data.FilesystemFileRetriever() as ffr: + # Should warn, but not error. + ffr.get_file("/non-existent/file.nc", str(tmp_path)) + log_record = caplog.records[0] + assert log_record.levelname == "WARNING" + assert log_record.message.startswith("file_path does not match any files:") + + +def test_FilesystemFileRetriever_copy_error(caplog): + """Test warning when file copy errors.""" + with fetch_data.FilesystemFileRetriever() as ffr: + # Please don't run as root. + ffr.get_file("tests/test_data/air_temp.nc", "/usr/bin") + assert not Path("/usr/bin/air_temp.nc").is_file() + log_record = caplog.records[0] + assert log_record.levelname == "WARNING" + assert log_record.message.startswith("Failed to copy") From 9900aa110754db9359e3f1bc52a22a7f5118c90a Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 23 Aug 2024 17:16:27 +0100 Subject: [PATCH 42/90] Test area constraint edge case --- tests/operators/test_constraints.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/operators/test_constraints.py b/tests/operators/test_constraints.py index e9ecf47be..77d300a0d 100644 --- a/tests/operators/test_constraints.py +++ b/tests/operators/test_constraints.py @@ -118,6 +118,13 @@ def test_generate_area_constraint(): assert expected_area_constraint in repr(area_constraint) +def test_generate_area_constraint_no_limits(): + """Generate area constraint with no limits.""" + area_constraint = constraints.generate_area_constraint(None, None, None, None) + expected_area_constraint = "Constraint()" + assert expected_area_constraint in repr(area_constraint) + + def test_combine_constraints(): """Combine constraint.""" stash_constraint = constraints.generate_stash_constraint("m01s03i236") From 1915b6124fe4099e8b759adbc1822934647ed89c Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 10:14:38 +0100 Subject: [PATCH 43/90] Fix typo in rose metadata --- cset-workflow/meta/diagnostics/rose-meta.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index e3d672d1a..06b1ec48a 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -210,7 +210,7 @@ type=python_boolean compulsory=true sort-key=2modellevel4 -[template variables=EXTRACT_ Date: Tue, 27 Aug 2024 10:27:55 +0100 Subject: [PATCH 44/90] Handle missing DATA_PERIOD for initiation time --- src/CSET/_workflow_utils/fetch_data.py | 8 ++++++-- tests/workflow_utils/test_fetch_data.py | 3 ++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index 0e0e11a67..27335fe34 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -104,9 +104,13 @@ def _get_needed_environment_variables() -> dict: "cycle_point": os.environ["CYLC_TASK_CYCLE_POINT"], "model_number": os.environ["MODEL_NUMBER"], } - # Data period is not needed for initiation time. - if variables["date_type"] != "initiation": + try: variables["data_period"] = isodate.parse_duration(os.environ["DATA_PERIOD"]) + except KeyError: + # Data period is not needed for initiation time. + if variables["date_type"] != "initiation": + raise + variables["data_period"] = None return variables diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index 34bf325c2..0a6434019 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -56,8 +56,9 @@ def test_get_needed_environment_variables(monkeypatch): # Check DATA_PERIOD is not there for initiation. monkeypatch.setenv("DATE_TYPE", "initiation") + monkeypatch.delenv("DATA_PERIOD") initiation_actual = fetch_data._get_needed_environment_variables() - assert "data_period" not in initiation_actual + assert initiation_actual["data_period"] is None def test_fetch_data(monkeypatch, tmp_path): From 68bdfd9ec26d44db1894438ef5c2ddf291da48f3 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 10:57:27 +0100 Subject: [PATCH 45/90] Finish website on compute platform Now it touches the data to generate the index, it probably shouldn't be run on the cylc scheduler. --- cset-workflow/flow.cylc | 1 - 1 file changed, 1 deletion(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index f1efed676..3eb8b923a 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -159,7 +159,6 @@ URL = https://metoffice.github.io/CSET [[finish_website]] # Updates the workflow info in the web interface. - platform = localhost [[send_email]] # Send email to notify that the workflow is complete. From 67907718962d7a33bf7aec8bf5949dcf3b8b554e Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 14:56:25 +0100 Subject: [PATCH 46/90] Remove outdated comment --- src/CSET/operators/plot.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/CSET/operators/plot.py b/src/CSET/operators/plot.py index 64ae0eca7..823bb5174 100644 --- a/src/CSET/operators/plot.py +++ b/src/CSET/operators/plot.py @@ -1029,9 +1029,7 @@ def plot_vertical_line_series( # Set the lower and upper limit for the x-axis to ensure all plots have same # range. This needs to read the whole cube over the range of the sequence - # and if applicable postage stamp coordinate. This only works if the - # plotting is done in the collate section of a recipe and not in the - # parallel section of a recipe. + # and if applicable postage stamp coordinate. vmin = np.floor(cube.data.min()) vmax = np.ceil(cube.data.max()) @@ -1243,9 +1241,7 @@ def plot_histogram_series( # Set the lower and upper limit for the colorbar to ensure all plots have # same range. This needs to read the whole cube over the range of the - # sequence and if applicable postage stamp coordinate. This only works if - # the plotting is done in the collate section of a recipe and not in the - # parallel section of a recipe. + # sequence and if applicable postage stamp coordinate. vmin = np.floor((cube.data.min())) vmax = np.ceil((cube.data.max())) From eb36acced3aa218689b7a4be2ba2166fb494158f Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 14:57:00 +0100 Subject: [PATCH 47/90] Update cset bake CLI reference --- docs/source/reference/cli.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/reference/cli.rst b/docs/source/reference/cli.rst index a07e631cd..811842862 100644 --- a/docs/source/reference/cli.rst +++ b/docs/source/reference/cli.rst @@ -13,20 +13,20 @@ page. .. code-block:: text - usage: cset bake [-h] [-i INPUT_DIR] -o OUTPUT_DIR -r RECIPE [--pre-only | --post-only] [-s STYLE_FILE] + usage: cset bake [-h] -i INPUT_DIR -o OUTPUT_DIR -r RECIPE [-s STYLE_FILE] [--plot-resolution PLOT_RESOLUTION] options: - -h, --help show this help message and exit + -h, --help show this help message and exit -i INPUT_DIR, --input-dir INPUT_DIR directory containing input data -o OUTPUT_DIR, --output-dir OUTPUT_DIR directory to write output into -r RECIPE, --recipe RECIPE recipe file to read - --parallel-only only run parallel steps - --collate-only only run collation steps -s STYLE_FILE, --style-file STYLE_FILE colour bar definition to use + --plot-resolution PLOT_RESOLUTION + plotting resolution in dpi Here is an example to run a recipe making use of the templated variable ``VARNAME`` in the recipe. The '-v' is optional to give verbose output:: From 3063f32c1bbfb8f9689590dd1f4f51d486a12acb Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 14:57:25 +0100 Subject: [PATCH 48/90] Remove parallel and collate steps We've now gone back to just steps. --- docs/source/getting-started/create-first-recipe.rst | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/source/getting-started/create-first-recipe.rst b/docs/source/getting-started/create-first-recipe.rst index d6a816522..999fdb313 100644 --- a/docs/source/getting-started/create-first-recipe.rst +++ b/docs/source/getting-started/create-first-recipe.rst @@ -75,11 +75,9 @@ Recipe Steps ------------ When baking you follow a recipe step-by-step, CSET does the same with its -recipes. The steps of the recipe are contained within one of two keys. The -``parallel`` key for independent tasks that process the raw data, and the -``collate`` key for sequential tasks that bring together the processed data into -the final output. Each block prefixed with a ``-`` (which makes a list in YAML) -is an individual step, and they are run in order from top to bottom. +recipes. The steps of the recipe are contained within the ``steps`` key. Each +block prefixed with a ``-`` (which makes a list in YAML) is an individual step, +and they are run in order from top to bottom. Each step has an ``operator`` key, which specifies which operator to use. A `complete list of operators is in the documentation`_, but for this tutorial we @@ -93,7 +91,7 @@ to the input data as its implicit input. .. code-block:: yaml - parallel: + steps: - operator: read.read_cubes Once we have read the data, we need to filter them down to the data we require @@ -157,7 +155,7 @@ After following this far your recipe should look like this: Extracts and plots the 1.5m air temperature from a file. The temperature is averaged across the time coordinate. - parallel: + steps: - operator: read.read_cubes - operator: filters.filter_cubes From 34a10698f0fc8739d43f76595f0d1a37c8ee6af4 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 16:01:59 +0100 Subject: [PATCH 49/90] Update documentation to reflect changes --- docs/source/contributing/code-review.rst | 2 - docs/source/contributing/documentation.rst | 6 +- .../getting-started/recipe-graph-details.svg | 207 ++++++++---------- docs/source/getting-started/recipe-graph.svg | 197 +++++++---------- docs/source/getting-started/run-recipe.rst | 23 +- .../getting-started/visualise-recipe.rst | 27 ++- docs/source/reference/cli.rst | 2 +- docs/source/reference/recipe-format.rst | 108 ++++----- docs/source/usage/add-diagnostic.rst | 6 +- docs/source/usage/workflow-installation.rst | 17 +- 10 files changed, 264 insertions(+), 331 deletions(-) diff --git a/docs/source/contributing/code-review.rst b/docs/source/contributing/code-review.rst index 0265d94ac..6aaec0316 100644 --- a/docs/source/contributing/code-review.rst +++ b/docs/source/contributing/code-review.rst @@ -194,7 +194,6 @@ For a plotting routine, specific questions to consider include: * Is the labelling present and appropriate? * Is the plot legible? - Portability Review ------------------ @@ -202,5 +201,4 @@ Portability Review 🚧 Section under construction. 🚧 - Something about the portability review… diff --git a/docs/source/contributing/documentation.rst b/docs/source/contributing/documentation.rst index 96d128aa8..75ac1a2ac 100644 --- a/docs/source/contributing/documentation.rst +++ b/docs/source/contributing/documentation.rst @@ -10,8 +10,8 @@ maintain documentation within the version control system, and keep it up to date. The `Sphinx website`_ has a useful primer to using reStructuredText for documentation. -The documentation is organised intro sections following the `Divio documentation -system`_. +The documentation is organised intro sections following the `Diátaxis +documentation system`_. You can build the documentation of CSET with the following command: @@ -21,7 +21,7 @@ You can build the documentation of CSET with the following command: The documentation will be output to the docs/build directory. -.. _Divio documentation system: https://documentation.divio.com/ +.. _Diátaxis documentation system: https://diataxis.fr/ Docstrings ---------- diff --git a/docs/source/getting-started/recipe-graph-details.svg b/docs/source/getting-started/recipe-graph-details.svg index 58aec39f9..1e1d7c23b 100644 --- a/docs/source/getting-started/recipe-graph-details.svg +++ b/docs/source/getting-started/recipe-graph-details.svg @@ -1,157 +1,124 @@ - - - - - + + + + -0ea71716-f3ff-43be-b23c-43b6d9d1972a - -read.read_cubes +61b2e64b-9e35-49ee-9200-2f69705e8485 + +read.read_cubes +<filename_pattern: *.nc> - + + +409506e4-ebd1-4ba1-bcc1-7aa92d2720c7 + +filters.filter_cubes + + + +61b2e64b-9e35-49ee-9200-2f69705e8485->409506e4-ebd1-4ba1-bcc1-7aa92d2720c7 + + + + -c1be712d-e343-4b0f-956a-7d2427a4c17d - -filters.filter_cubes +7f9d1e48-4605-474c-9ee2-0ce468080dbb + +constraints.combine_constraints - + -0ea71716-f3ff-43be-b23c-43b6d9d1972a->c1be712d-e343-4b0f-956a-7d2427a4c17d - - +61b2e64b-9e35-49ee-9200-2f69705e8485->7f9d1e48-4605-474c-9ee2-0ce468080dbb + + - + -7ef64587-30ed-42f2-bdee-376c53d42ffc - -constraints.combine_constraints +8fba8eff-d474-4913-9457-ae6b0383c968 + +constraints.generate_var_constraint +<varname: m01s03i236> - + -0ea71716-f3ff-43be-b23c-43b6d9d1972a->7ef64587-30ed-42f2-bdee-376c53d42ffc - - +61b2e64b-9e35-49ee-9200-2f69705e8485->8fba8eff-d474-4913-9457-ae6b0383c968 + + - + -22b128ed-58f4-4a98-8ceb-cec0cd02f234 - -constraints.generate_stash_constraint -<stash: m01s03i236> +1c6346ce-d3f5-429c-b892-455afa0256ad + +constraints.generate_cell_methods_constraint +<cell_methods: []> - + -0ea71716-f3ff-43be-b23c-43b6d9d1972a->22b128ed-58f4-4a98-8ceb-cec0cd02f234 - - - - - -d97c190b-70ba-4c2b-a01b-4a89c9d8dec2 - -constraints.generate_cell_methods_constraint -<cell_methods: []> - - - -0ea71716-f3ff-43be-b23c-43b6d9d1972a->d97c190b-70ba-4c2b-a01b-4a89c9d8dec2 - - +61b2e64b-9e35-49ee-9200-2f69705e8485->1c6346ce-d3f5-429c-b892-455afa0256ad + + START - -START + +START - + -START->0ea71716-f3ff-43be-b23c-43b6d9d1972a - - +START->61b2e64b-9e35-49ee-9200-2f69705e8485 + + - - -b6d3536a-27d8-4882-b103-709c18c3eb49 - -constraints.generate_stash_constraint -<stash: m01s03i236> + + +a5185878-23bc-4a6d-b7b8-fe0938e8287f + +plot.spatial_pcolormesh_plot - - -START->b6d3536a-27d8-4882-b103-709c18c3eb49 - - + + +409506e4-ebd1-4ba1-bcc1-7aa92d2720c7->a5185878-23bc-4a6d-b7b8-fe0938e8287f + + - + -b6d3536a-27d8-4882-b103-709c18c3eb49->0ea71716-f3ff-43be-b23c-43b6d9d1972a - - +7f9d1e48-4605-474c-9ee2-0ce468080dbb->409506e4-ebd1-4ba1-bcc1-7aa92d2720c7 + + - - -e1451adc-5e2a-46d6-8950-2e2fa1f0c364 - -collapse.collapse -<coordinate: time> -<method: MEAN> - - + -c1be712d-e343-4b0f-956a-7d2427a4c17d->e1451adc-5e2a-46d6-8950-2e2fa1f0c364 - - +8fba8eff-d474-4913-9457-ae6b0383c968->7f9d1e48-4605-474c-9ee2-0ce468080dbb + + - + -7ef64587-30ed-42f2-bdee-376c53d42ffc->c1be712d-e343-4b0f-956a-7d2427a4c17d - - +1c6346ce-d3f5-429c-b892-455afa0256ad->7f9d1e48-4605-474c-9ee2-0ce468080dbb + + + + + +d044b826-16ab-4caa-8143-ce633218e124 + +write.write_cube_to_nc +<overwrite: True> - + -22b128ed-58f4-4a98-8ceb-cec0cd02f234->7ef64587-30ed-42f2-bdee-376c53d42ffc - - - - - -d97c190b-70ba-4c2b-a01b-4a89c9d8dec2->7ef64587-30ed-42f2-bdee-376c53d42ffc - - - - - -5f951fe0-1b14-4816-87a4-3faf03dd2eaa - -plot.spatial_contour_plot -<file_path: CSET_OUTPUT_PATH> - - - -e1451adc-5e2a-46d6-8950-2e2fa1f0c364->5f951fe0-1b14-4816-87a4-3faf03dd2eaa - - - - - -4f19f4e1-9084-4ed5-82b4-563c5ebf2251 - -write.write_cube_to_nc -<file_path: CSET_OUTPUT_PATH> - - - -5f951fe0-1b14-4816-87a4-3faf03dd2eaa->4f19f4e1-9084-4ed5-82b4-563c5ebf2251 - - +a5185878-23bc-4a6d-b7b8-fe0938e8287f->d044b826-16ab-4caa-8143-ce633218e124 + + diff --git a/docs/source/getting-started/recipe-graph.svg b/docs/source/getting-started/recipe-graph.svg index 88f1a4cf4..cccfd1b68 100644 --- a/docs/source/getting-started/recipe-graph.svg +++ b/docs/source/getting-started/recipe-graph.svg @@ -1,151 +1,120 @@ - - -Graph visualisation of the a CSET recipe. - - - + + + + -fc1ca1f4-5d5c-4e34-bbeb-632dd8572e4d - -read.read_cubes +2d30a500-cd2f-423e-9b71-16fda2234267 + +read.read_cubes - + + +fd3e51f5-09f3-432e-843a-64038e1c2ead + +filters.filter_cubes + + + +2d30a500-cd2f-423e-9b71-16fda2234267->fd3e51f5-09f3-432e-843a-64038e1c2ead + + + + -e006203b-b212-45b8-acb2-d2aba3a91c18 - -filters.filter_cubes +7373e45f-e64a-499f-930a-f620387a0c50 + +constraints.combine_constraints - + -fc1ca1f4-5d5c-4e34-bbeb-632dd8572e4d->e006203b-b212-45b8-acb2-d2aba3a91c18 - - +2d30a500-cd2f-423e-9b71-16fda2234267->7373e45f-e64a-499f-930a-f620387a0c50 + + - + -84e39446-1511-4779-adc8-c88fdcbf38d2 - -constraints.combine_constraints +91e18a27-b9fe-4a7c-88ec-06e3601567dc + +constraints.generate_var_constraint - + -fc1ca1f4-5d5c-4e34-bbeb-632dd8572e4d->84e39446-1511-4779-adc8-c88fdcbf38d2 - - +2d30a500-cd2f-423e-9b71-16fda2234267->91e18a27-b9fe-4a7c-88ec-06e3601567dc + + - + -e27f5b60-93b9-4ac8-bc4e-957547716f51 - -constraints.generate_stash_constraint +d011cf16-6a19-4a74-a21a-2db0b3e7aca9 + +constraints.generate_cell_methods_constraint - + -fc1ca1f4-5d5c-4e34-bbeb-632dd8572e4d->e27f5b60-93b9-4ac8-bc4e-957547716f51 - - - - - -888067b4-392d-427b-9f23-172095516377 - -constraints.generate_cell_methods_constraint - - - -fc1ca1f4-5d5c-4e34-bbeb-632dd8572e4d->888067b4-392d-427b-9f23-172095516377 - - +2d30a500-cd2f-423e-9b71-16fda2234267->d011cf16-6a19-4a74-a21a-2db0b3e7aca9 + + START - -START + +START - + -START->fc1ca1f4-5d5c-4e34-bbeb-632dd8572e4d - - +START->2d30a500-cd2f-423e-9b71-16fda2234267 + + - - -f81b25eb-5d0a-4ece-99bf-65395ba60400 - -constraints.generate_stash_constraint + + +1fd09a47-1fc5-46b2-8aa9-388f0dbd0dd2 + +plot.spatial_pcolormesh_plot - - -START->f81b25eb-5d0a-4ece-99bf-65395ba60400 - - + + +fd3e51f5-09f3-432e-843a-64038e1c2ead->1fd09a47-1fc5-46b2-8aa9-388f0dbd0dd2 + + - + -f81b25eb-5d0a-4ece-99bf-65395ba60400->fc1ca1f4-5d5c-4e34-bbeb-632dd8572e4d - - +7373e45f-e64a-499f-930a-f620387a0c50->fd3e51f5-09f3-432e-843a-64038e1c2ead + + - - -af2a4107-d432-454e-8a71-1cdb7cfdd464 - -collapse.collapse - - + -e006203b-b212-45b8-acb2-d2aba3a91c18->af2a4107-d432-454e-8a71-1cdb7cfdd464 - - +91e18a27-b9fe-4a7c-88ec-06e3601567dc->7373e45f-e64a-499f-930a-f620387a0c50 + + - + -84e39446-1511-4779-adc8-c88fdcbf38d2->e006203b-b212-45b8-acb2-d2aba3a91c18 - - +d011cf16-6a19-4a74-a21a-2db0b3e7aca9->7373e45f-e64a-499f-930a-f620387a0c50 + + + + + +2c736e22-5490-46dc-8d34-3684dbb58ba9 + +write.write_cube_to_nc - + -e27f5b60-93b9-4ac8-bc4e-957547716f51->84e39446-1511-4779-adc8-c88fdcbf38d2 - - - - - -888067b4-392d-427b-9f23-172095516377->84e39446-1511-4779-adc8-c88fdcbf38d2 - - - - - -ff4975a6-888d-4ff2-9254-8d6e12e789c7 - -plot.spatial_contour_plot - - - -af2a4107-d432-454e-8a71-1cdb7cfdd464->ff4975a6-888d-4ff2-9254-8d6e12e789c7 - - - - - -98b17d40-2838-4f13-b5d1-644f90f253a3 - -write.write_cube_to_nc - - - -ff4975a6-888d-4ff2-9254-8d6e12e789c7->98b17d40-2838-4f13-b5d1-644f90f253a3 - - +1fd09a47-1fc5-46b2-8aa9-388f0dbd0dd2->2c736e22-5490-46dc-8d34-3684dbb58ba9 + + diff --git a/docs/source/getting-started/run-recipe.rst b/docs/source/getting-started/run-recipe.rst index fe295543b..848e68993 100644 --- a/docs/source/getting-started/run-recipe.rst +++ b/docs/source/getting-started/run-recipe.rst @@ -1,32 +1,24 @@ Run a pre-existing recipe ========================= -.. Tutorial on running a pre-existing recipe, covering cookbook and bake. +.. Tutorial on running a pre-existing recipe, covering cset bake. CSET works by running recipes that describe how to transform and and visualise data. It comes with a collection of pre-written recipes to get you started. In this tutorial you will use CSET to plot the mean surface air temperature of a forecast. -We will create a basic spatial plot of the mean surface air -temperature. There is a pre-existing recipe for this that can be retrieved with -the CSET cookbook command. - -Try the following: - -.. code-block:: bash - - cset cookbook -o recipes mean_surface_air_temperature_spatial_plot.yaml - -This will write out a recipes folder containing recipe ``.yaml`` file to your -current directory. We will use the -``mean_surface_air_temperature_spatial_plot.yaml`` recipe. +We will create a basic spatial plot of the mean surface air temperature, using a +pre-existing recipe. You can `download the recipe file here`_. Now you need to find some data to process. You can `download an example file here`_, or with the following command. .. code-block:: bash + # Download recipe file. + curl -LO https://gist.githubusercontent.com/jfrost-mo/6e539d5be20bfa28342bf4ff82f24dea/raw/air_temperature_spatial_plot.yaml + # Download example data. curl -LO https://github.com/MetOffice/CSET/raw/main/tests/test_data/air_temp.nc Now we are ready to run our recipe. This is where we use the ``cset bake`` @@ -35,7 +27,7 @@ output should be a directory, but it will be created if it does not exist. .. code-block:: bash - cset bake -i air_temp.nc -o output/ -r recipes/mean_surface_air_temperature_spatial_plot.yaml + cset bake -i air_temp.nc -o output/ -r air_temperature_spatial_plot.yaml This will run the recipe and leave its output in the specified output directory. @@ -45,4 +37,5 @@ The most interesting output will be the plot, which you can look at with You've now successfully run CSET with a pre-existing recipe. In the next tutorial we will see what is going on inside. +.. _download the recipe file here: https://gist.githubusercontent.com/jfrost-mo/6e539d5be20bfa28342bf4ff82f24dea/raw/air_temperature_spatial_plot.yaml .. _download an example file here: https://github.com/MetOffice/CSET/raw/main/tests/test_data/air_temp.nc diff --git a/docs/source/getting-started/visualise-recipe.rst b/docs/source/getting-started/visualise-recipe.rst index b28c5baed..6e5f0d835 100644 --- a/docs/source/getting-started/visualise-recipe.rst +++ b/docs/source/getting-started/visualise-recipe.rst @@ -6,15 +6,14 @@ Visualising a recipe graphically In this tutorial we will investigate what is going on inside of a recipe, and visualise the *operators* inside. -As in the previous tutorial use the ``cset cookbook`` command to find the "Mean -Air Temperature Spatial Plot" recipe. +As in the previous tutorial `download this example recipe file`_. We will now visualise the steps inside the recipe using the ``cset graph`` command. .. code-block:: bash - cset graph -r recipes/mean-air-temp-spatial-plot.yaml + cset graph -r air_temperature_spatial_plot.yaml This should open an image of a visualisation of the recipe. Each node is a step, or an *operator*, which does a single processing task. You can see that later @@ -38,20 +37,20 @@ Now we can see the structure of the recipe graphically, we can delve into what each operator is doing. The ellipses represent the operators, and the arrows between them show where they pass their output to the next operators. -The first operator in the recipe is ``read.read_cubes``, however it takes a -constraint on a STASH code, which is itself created by another operator, -``constraints.generate_stash_constraint``. +The first operator in the recipe is ``read.read_cubes``, which loads the data +cubes from a file into a CubeList, which it passes onto the next step. This operators-running-operators behaviour is further used in the next step, where the read CubeList is filtered down to a single air temperature cube. There -are two constraints used here, the STASH code, and the cell methods. These are -combined into a single constraint by the ``constraints.combine_constraints`` -operator before being used by the ``filters.filter_cubes`` operator. +are two constraints used here, the variable's STASH code, and the cell methods. +These are combined into a single constraint by the +``constraints.combine_constraints`` operator before being used by the +``filters.filter_cubes`` operator. -Afterwards the cube has its time dimension removed by the mean method applied by -the ``collapse.collapse`` operator, so it becomes two-dimensional. Then it -passes to the ``plot.spatial_contour_plot`` and ``write.write_cube_to_nc`` -operators to be plotted and saved. +Afterwards the cube passes to the ``plot.spatial_contour_plot`` and +``write.write_cube_to_nc`` operators to be plotted and saved. You now know how to visualise a recipe, and a little about the operators it is -made up of. In the next tutorial you will learn to make your own. +made up of. In the next tutorial you will learn to make your own recipe. + +.. _download this example recipe file: https://gist.githubusercontent.com/jfrost-mo/6e539d5be20bfa28342bf4ff82f24dea/raw/air_temperature_spatial_plot.yaml diff --git a/docs/source/reference/cli.rst b/docs/source/reference/cli.rst index 811842862..92485b68c 100644 --- a/docs/source/reference/cli.rst +++ b/docs/source/reference/cli.rst @@ -57,7 +57,7 @@ or use ``--details`` for descriptions of available recipes. options: -h, --help show this help message and exit - -d, --details list available recipes. Supplied recipes are detailed. + -d, --details list available recipes. Supplied recipes are detailed -o OUTPUT_DIR, --output-dir OUTPUT_DIR directory to save recipes. If omitted uses $PWD diff --git a/docs/source/reference/recipe-format.rst b/docs/source/reference/recipe-format.rst index 3ebe4041d..133b660d7 100644 --- a/docs/source/reference/recipe-format.rst +++ b/docs/source/reference/recipe-format.rst @@ -12,62 +12,60 @@ Below is a commented example recipe: .. code-block:: yaml - category: Category of recipe - title: Name of recipe - description: | - Extended description that can - go across multiple lines. - - parallel: - # Specify the operator to run in each step. - - operator: read.read_cubes - - - operator: filters.filter_cubes - # Can specify extra keyword arguments as sub-maps. - constraint: - # Can nest in another operator to use its output as an argument. - operator: generate_constraints.generate_stash_constraints + # Name of the recipe. + title: Surface air temperature spatial plot + # Category of recipe, used to group together multiple recipes in output. + category: Quick look + # Description will be displayed alongside output. + description: | + Extended description that can go across multiple lines. It is written in + [Markdown](https://commonmark.org/help/) and can thus contain links and + _formatting_. + + # Sequence of steps to run. + steps: + # Specify the operator to run in each step. + - operator: read.read_cubes + # Specify the name of the argument, and its value. + filename_pattern: "*.nc" + + - operator: filters.filter_cubes + # Can specify extra keyword arguments as sub-maps. + constraint: + operator: constraints.combine_constraints + var_constraint: + # Can nest in another operator to use its output as an argument, + # multiple levels deep if needed. + operator: constraints.generate_var_constraint # Input implicitly taken from the previous step, but can be overridden # by using the appropriate keyword argument. - stash: m01s03i236 - - - operator: write.write_cube_to_nc - # Specify the name of the argument, and its value. - filename: intermediate/processed_data - # intermediate is a slightly special folder for partially processed data - # that needs collating. + varname: m01s03i236 + cell_method_constraint: + operator: constraints.generate_cell_methods_constraint + # Values can be more than just strings, such as this empty list. + cell_methods: [] - # Steps to collate processed data into output. - collate: - - operator: read.read_cube - filename: intermediate/*.nc + # Save a sequence of plots, one per time. + - operator: plot.spatial_pcolormesh_plot - # Save a sequence of plots, one per time. - - operator: plot.plot_spatial_plot - - # Save a single cube with all the processed data. - - operator: write.write_cube_to_nc + # Save a single cube with all the processed data. + - operator: write.write_cube_to_nc + overwrite: True The ``title`` and ``description`` keys provide a human readable description of -what the recipe does. The ``title`` is also used to derive the ID of the running -recipe, used when running the recipe in a workflow. The ``category`` is used to -group the produced diagnostics in the output website. - -The ``parallel`` and ``collate`` keys specify lists of processing steps. The -steps are run from top to bottom, with each step specifying an operator to run, -and optionally any additional inputs to that operator. A parallel step is -denoted by a ``-`` under the ``parallel:`` key. The operators are specified on -the operator key. Its value should be a string of the form ``module.function``. -For additional inputs the key should be the name of the argument. - -The ``collate:`` key is used for collating together the output of the -parallel steps to produce the final output. This allows for the expensive -processing to be parallelised over many compute nodes, with just the final -visualisation of the data done in a single job to ensure it has all of the data. +what the recipe does. The ``category`` is used to group the produced diagnostics +in the output website. + +The ``steps`` key lists the processing steps. The steps are run from top to +bottom, with each step specifying an operator to run, and optionally any +additional inputs to that operator. Each separate step is denoted by a ``-`` +under the ``steps:`` key. The operators are specified on the operator key. Its +value should be a string of the form ``module.function``. For additional inputs +the key should be the name of the argument to that operator. The below code block shows how you can nest operators multiple levels deep. For -details of the specific operators involved, and the arguments that can take, see -the :doc:`/reference/operators` page. +details of the specific operators involved, and the arguments that they can +take, see the :doc:`/reference/operators` page. .. code-block:: yaml @@ -98,7 +96,7 @@ case letters and underscores. For example: .. code-block:: yaml - parameter: $MY_VARIABLE + key: $MY_VARIABLE When the recipe is run with ``cset bake`` the variable is replaced with a value given on the command line. This is done using the variable name as an option, @@ -114,8 +112,16 @@ Alternatively a space can be used between the variable and value: cset bake -i input -o output -r recipe.yaml --MY_VARIABLE value -The given value will be templated into the parameter so what runs is actually: +The given value will be templated into the variable so what runs is actually: + +.. code-block:: yaml + + key: value + +This can also be used to template into existing values, such as the title. .. code-block:: yaml - parameter: value + title: $VARNAME spatial plot + # When VARNAME is "air_temperature" becomes: + title: air_temperature spatial plot diff --git a/docs/source/usage/add-diagnostic.rst b/docs/source/usage/add-diagnostic.rst index 70de3a66a..57b329bcf 100644 --- a/docs/source/usage/add-diagnostic.rst +++ b/docs/source/usage/add-diagnostic.rst @@ -175,7 +175,6 @@ Push code into remote repository branch with: # Just "git push" will suggest the correct command. git push --set-upstream origin - If the remote branch is behind changes on trunk then you update the remote branch on the GitHub repository by merging in changes from main. While on your branch: @@ -185,7 +184,6 @@ branch: # Merge in the changes from the copy of main on GitHub. git merge origin/main - Don't forget to push your changes back up to GitHub. .. code-block:: bash @@ -198,7 +196,7 @@ Start the review process Make a :ref:`pull-request` on GitHub to propose your changes for inclusion in the trunk. -Once you have satisfied the steps in the :doc:`Developer's guide -` go ahead and request a review on GitHub. +Once you have satisfied the steps in the :doc:`/contributing/index` go ahead and +request a review on GitHub. .. _recipe file: https://metoffice.github.io/CSET/usage/operator-recipes diff --git a/docs/source/usage/workflow-installation.rst b/docs/source/usage/workflow-installation.rst index b3c241a90..3488ab6fc 100644 --- a/docs/source/usage/workflow-installation.rst +++ b/docs/source/usage/workflow-installation.rst @@ -47,13 +47,6 @@ the config. .. image:: rose-edit.png :alt: rose edit GUI, showing the environment setup options. -* The Data and Cycling section contains settings for the input data, and the - workflow cycling controls. - -* The Diagnostic section contains settings for enabling the various diagnostics - included with CSET. Some general diagnostics will take from a list of variable - names or STASH codes, and operate on all of them. - * The Environment section contains settings for controlling how CSET integrates with your compute environment. This includes things like activating modules, and choosing a site profile. @@ -61,6 +54,16 @@ the config. * The General section contains whole-workflow configuration for things like logging and housekeeping. +* The Models and Cases section contains settings for the input data. Here you + need to setup how you want to cycle (over case studies or a trial period), + how many models you want, and where to find the data for them. + +* The Diagnostic section contains settings for enabling the various diagnostics + included with CSET. The diagnostics are split into vague science area. Some + general diagnostics will have a table in their section for setting which model + fields to process. Diagnostics may also take additional options after being + enabled. + Help for each variable can be viewed by clicking on the variable's name. Once you have configured CSET you can save and close rose edit. From 7e75674d9f22eed5c72a0f67af98c9e6571c1a16 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 16:07:28 +0100 Subject: [PATCH 50/90] Open HTML output in tutorial --- docs/source/getting-started/run-recipe.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/getting-started/run-recipe.rst b/docs/source/getting-started/run-recipe.rst index 848e68993..a9be49d1a 100644 --- a/docs/source/getting-started/run-recipe.rst +++ b/docs/source/getting-started/run-recipe.rst @@ -30,9 +30,7 @@ output should be a directory, but it will be created if it does not exist. cset bake -i air_temp.nc -o output/ -r air_temperature_spatial_plot.yaml This will run the recipe and leave its output in the specified output directory. - -The most interesting output will be the plot, which you can look at with -``xdg-open output/plot.png``. +You can look at the visualised output with ``xdg-open output/index.html``. You've now successfully run CSET with a pre-existing recipe. In the next tutorial we will see what is going on inside. From 40e54ef01de7c1849d55f624a28f59aa3c3d60c2 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 16:30:40 +0100 Subject: [PATCH 51/90] Make additional fields compulsory --- cset-workflow/meta/rose-meta.conf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index da769f6a2..022b97e24 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -137,6 +137,7 @@ help=Remove any modules that are loaded by default before loading the specified ones. This is recommended as it makes your workflows more explicit and less deendent on site-specific details, increasing portability. type=python_boolean +compulsory=true sort-key=modules2 [template variables=MODULES_LIST] @@ -144,6 +145,7 @@ ns=Environment description=Modules to load. type=spaced_list length=: +compulsory=true sort-key=modules2 [template variables=CSET_ENV_USE_CONDA] @@ -166,6 +168,7 @@ help=Path where the conda executable is. This should be to a directory, rather to a binary itself. Leave this field blank if conda comes from another source, such as modules or being already on path. type=quoted +compulsory=true sort-key=conda2 [template variables=CONDA_VENV_CREATE] @@ -175,6 +178,7 @@ description=Whether to (re)create the conda environment. help=When enabled it will check that the conda environment exists and is up-to-date, recreating it otherwise. type=python_boolean +compulsory=true sort-key=conda2 [template variables=CONDA_VENV_LOCATION] @@ -188,6 +192,7 @@ help=Existing environments can be found by running `conda info --envs` and If creating the conda environment as part of the workflow, it will only be created if it does not exist or is out of date. type=quoted +compulsory=true sort-key=conda2 [template variables=CSET_ENV_USE_LOCAL_CSET] @@ -209,6 +214,7 @@ help=Path to either a wheel file, or a checked out copy of the CSET git repo. For the repository, it should be the path to the directory containing the pyproject.toml file. type=quoted +compulsory=true sort-key=localcset2 [template variables=CSET_ENV_SEPARATE_MET] @@ -226,24 +232,28 @@ sort-key=met1 ns=Environment description=Location of the conda virtual environment needed by METplus. type=quoted +compulsory=true sort-key=met2 [template variables=MET_INSTALL_DIR] ns=Environment description=Path where MET is installed to. type=quoted +compulsory=true sort-key=met2 [template variables=METPLUS_BASE] ns=Environment description=Path where METplus is installed to. type=quoted +compulsory=true sort-key=met2 [template variables=MET_LIBRARIES] ns=Environment description=Path to libraries needed by MET. type=quoted +compulsory=true sort-key=met2 From 1f3a941201caf7d330d6123fee7166748f7d20d7 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 27 Aug 2024 16:31:18 +0100 Subject: [PATCH 52/90] Update example rose-suite.conf --- cset-workflow/rose-suite.conf.example | 178 ++++++++++++++++---------- 1 file changed, 107 insertions(+), 71 deletions(-) diff --git a/cset-workflow/rose-suite.conf.example b/cset-workflow/rose-suite.conf.example index 45581efca..32717b2e0 100644 --- a/cset-workflow/rose-suite.conf.example +++ b/cset-workflow/rose-suite.conf.example @@ -1,101 +1,137 @@ [template variables] -BASIC_QQ_PLOTS=False -CLEAN_WEB_DIR=True +BASIC_QQ_PLOT=False +COLORBAR_FILE="" !!CONDA_METPLUS_VENV_LOCATION="" CONDA_PATH="" CONDA_VENV_CREATE=True -CONDA_VENV_LOCATION="$SCRATCH/cset-workflow-conda-env" -!!COORDINATE_LIST=[] -CSET_CYCLE_PERIOD="" +CONDA_VENV_LOCATION="" +!!COORDINATE_LIST="" +CSET_ANALYSIS_OFFSET="" +CSET_ANALYSIS_PERIOD="" +CSET_CASE_DATES=[] +CSET_CYCLING_MODE="case_study" CSET_ENV_SEPARATE_MET=False CSET_ENV_USE_CONDA=True -CSET_ENV_USE_LOCAL_CSET=True +CSET_ENV_USE_LOCAL_CSET=False CSET_ENV_USE_MODULES=False -!!CSET_FILE_NAME_METADATA_PATTERN="" -!!CSET_FILE_TIME_OFFSET=0 -CSET_FINAL_CYCLE_POINT="" -CSET_INCREMENTAL_DATA_FETCH=False -CSET_INCREMENTAL_OUTPUT=False -!!CSET_INCREMENTAL_OUTPUT_PERIOD="" -CSET_INITIAL_CYCLE_POINT="" -CSET_INPUT_FILE_PATH="" -CSET_LOCAL_CSET_PATH="$HOME/CSET" -CSET_RUNAHEAD_LIMIT=10 -!!CSET_TIMES_PER_FILE=0 -CS_FINISHCOORDS="" -CS_STARTCOORDS="" -CS_VARS="" -CS_VERTLEV="" -DB_LONG_JOB=False -DETERMINISTIC_PLOT_AGGREGATE_PRECIPITATION=False +!!CSET_LOCAL_CSET_PATH="" +CSET_MODEL_COUNT=1 +CSET_RUNAHEAD_LIMIT=5 +!!CSET_TRIAL_CYCLE_PERIOD="" +!!CSET_TRIAL_END_DATE="" +!!CSET_TRIAL_START_DATE="" DETERMINISTIC_PLOT_CAPE_RATIO=False DETERMINISTIC_PLOT_INFLOW_PROPERTIES=False -DETERMINISTIC_PLOT_MODEL_LEVEL_AIR_TEMP=False -DETERMINISTIC_PLOT_SURFACE_AIR_TEMP=False DOMAIN_HISTOGRAM_SERIES=False -HISTOGRAM_TYPE="step" DOMAIN_MEAN_SURFACE_TIME_SERIES=False -DOMAIN_MEAN_TIME_SERIES_STASH=False DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES=False DOMAIN_MEAN_VERTICAL_PROFILE_SERIES=False -DOMAIN_SURFACE_HISTOGRAM_SERIES=False -ENSEMBLE_PLOT_SURFACE_AIR_TEMP=False -EXTRACT_TRANSECT=False -FETCH_FCST_OPT_CONF="filesystem" +DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD=False +EXTRACT_MLEVEL_TRANSECT=False +EXTRACT_PLEVEL_TRANSECT=False +!!HISTOGRAM_TYPE="step" HOUSEKEEPING_MODE=2 +!!LATITUDE_POINT=0 !!LEVELS_A=[] !!LEVELS_B=[] -LFRIC_BASIC_QQ_PLOT=False -!!LFRIC_MODEL_FIELDS_A=[] -!!LFRIC_VERTICAL_CORDINATE_A=[] -!!LFRIC_LEVELS_A=[] -!!LFRIC_MODEL_FIELDS_B=[] -!!LFRIC_VERTICAL_CORDINATE_B=[] -!!LFRIC_LEVELS_B=[] -!!LFRIC_COORDINATE_LIST=[] -!!LFRIC_ONE_TO_ONE=False -LFRIC_DOMAIN_MEAN_SURFACE_TIME_SERIES=False -LFRIC_DOMAIN_HISTOGRAM_SERIES=False -LFRIC_DOMAIN_MEAN_VERTICAL_PROFILE_SERIES=False -LFRIC_MODEL_LEVELS=[] -LFRIC_MODEL_LEVEL_MODEL_FIELDS=[] -LFRIC_PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD=False -LFRIC_PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD=False -LFRIC_PLOT_SPATIAL_SURFACE_MODEL_FIELD=False -LFRIC_DOMAIN_MEAN_VERTICAL_PROFILE_SERIES=False LOGLEVEL="INFO" -MEAN_HOURLY_ACCUMULATED_PRECIPITATION_ENSEMBLE_POSTAGE_STAMP_PLOT=False -METPLUS_ANA_DIR="" +!!LONGITUDE_POINT=0 !!METPLUS_BASE="" -!!METPLUS_OBS_DIR="" -!!METPLUS_OPT_CONFIG_KEYS="" METPLUS_GRID_STAT=False -METPLUS_OBS_DIR="" -METPLUS_OPT_CONFIG_KEYS="" METPLUS_POINT_STAT=False !!MET_INSTALL_DIR="" !!MET_LIBRARIES="" -MODEL_LEVEL_MODEL_FIELDS=[] -!!MODULES_LIST= -!!MODULES_PURGE=True -PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD=False +!!MLEVEL_TRANSECT_FINISHCOORDS= +!!MLEVEL_TRANSECT_STARTCOORDS= !!MODEL_FIELDS_A=[] !!MODEL_FIELDS_B=[] +MODEL_LEVEL_MODEL_FIELDS= !!MODULES_LIST= !!MODULES_PURGE=True -!!ONE_TO_ONE=True +!!ONE_TO_ONE=False +!!PLEVEL_TRANSECT_FINISHCOORDS= +!!PLEVEL_TRANSECT_STARTCOORDS= +PLOT_RESOLUTION=100 +PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD=False PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD=False -PLOT_SPATIAL_STASH_FIELD=False PLOT_SPATIAL_SURFACE_MODEL_FIELD=False -!!PRESSURE_LEVELS= -!!PRESSURE_LEVEL_MODEL_FIELDS= -STASH_CODES=[] -!!SUBAREA_LAT_BOUND_BOTTOM=0 -!!SUBAREA_LAT_BOUND_TOP=0 -!!SUBAREA_LON_BOUND_LEFT=0 -!!SUBAREA_LON_BOUND_RIGHT=0 -SURFACE_MODEL_FIELDS=[] -!!VERTICAL_CORDINATE_A=[] -!!VERTICAL_CORDINATE_B=[] +PRESSURE_LEVELS=[] +PRESSURE_LEVEL_MODEL_FIELDS= +!!SINGLE_POINT_METHOD="Nearest" +SURFACE_MODEL_FIELDS= +SURFACE_SINGLE_POINT_TIME_SERIES=False +UM_MODEL_LEVELS=[] +!!VERTICAL_COORDINATE_A=[] +!!VERTICAL_COORDINATE_B=[] WEB_ADDR="" WEB_DIR="$HOME/public_html/CSET" +m01_data_path="" +!!m01_data_period="" +m01_data_source="filesystem" +m01_date_type="initiation" +m01_name="" +m01_preprocessing=False +!!m01_preprocessing_recipe="" +!!m02_data_path="" +!!m02_data_period="" +!!m02_data_source="filesystem" +!!m02_date_type="initiation" +!!m02_name="" +!!m02_preprocessing=False +!!m02_preprocessing_recipe="" +!!m03_data_path="" +!!m03_data_period="" +!!m03_data_source="filesystem" +!!m03_date_type="initiation" +!!m03_name="" +!!m03_preprocessing=False +!!m03_preprocessing_recipe="" +!!m04_data_path="" +!!m04_data_period="" +!!m04_data_source="filesystem" +!!m04_date_type="initiation" +!!m04_name="" +!!m04_preprocessing=False +!!m04_preprocessing_recipe="" +!!m05_data_path="" +!!m05_data_period="" +!!m05_data_source="filesystem" +!!m05_date_type="initiation" +!!m05_name="" +!!m05_preprocessing=False +!!m05_preprocessing_recipe="" +!!m06_data_path="" +!!m06_data_period="" +!!m06_data_source="filesystem" +!!m06_date_type="initiation" +!!m06_name="" +!!m06_preprocessing=False +!!m06_preprocessing_recipe="" +!!m07_data_path="" +!!m07_data_period="" +!!m07_data_source="filesystem" +!!m07_date_type="initiation" +!!m07_name="" +!!m07_preprocessing=False +!!m07_preprocessing_recipe="" +!!m08_data_path="" +!!m08_data_period="" +!!m08_data_source="filesystem" +!!m08_date_type="initiation" +!!m08_name="" +!!m08_preprocessing=False +!!m08_preprocessing_recipe="" +!!m09_data_path="" +!!m09_data_period="" +!!m09_data_source="filesystem" +!!m09_date_type="initiation" +!!m09_name="" +!!m09_preprocessing=False +!!m09_preprocessing_recipe="" +!!m10_data_path="" +!!m10_data_period="" +!!m10_data_source="filesystem" +!!m10_date_type="initiation" +!!m10_name="" +!!m10_preprocessing=False +!!m10_preprocessing_recipe="" From 322066bbe6fc0a5c883199275de62d7499a3018f Mon Sep 17 00:00:00 2001 From: Sylvia Bohnenstengel <62748926+Sylviabohnenstengel@users.noreply.github.com> Date: Fri, 30 Aug 2024 09:44:34 +0100 Subject: [PATCH 53/90] Update rose-meta.conf --- cset-workflow/meta/diagnostics/rose-meta.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index 06b1ec48a..d0d9894b0 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -8,7 +8,7 @@ [template variables=SURFACE_MODEL_FIELDS] ns=Diagnostics/Quicklook title=Surface model fields -description=Per model field names. +description=Field names per model. help=Variable names for surface variables. The names across a row should match the same physical phenomenon, and use the appropriate standard, long, or field name, or the STASH code for each model. Blank entries indicate that a @@ -82,7 +82,7 @@ sort-key=0surface6 [template variables=PRESSURE_LEVEL_MODEL_FIELDS] ns=Diagnostics/Quicklook title=Pressure level model fields -description=Per model field names. +description=Field names for each model. help=Variable names for pressure level variables. The names across a row should match the same physical phenomenon, and use the appropriate standard, long, or field name, or the STASH code for each model. Blank entries indicate that @@ -175,7 +175,7 @@ sort-key=1pressure6 [template variables=MODEL_LEVEL_MODEL_FIELDS] ns=Diagnostics/Quicklook title=Model level model fields -description=Per model field names. +description=Field names for each model. help=Variable names for model level variables. The names across a row should match the same physical phenomenon, and use the appropriate standard, long, or field name, or the STASH code for each model. Blank entries indicate that From 2276c9faa5319f5ef02ff433812c6b7cb0411ae3 Mon Sep 17 00:00:00 2001 From: Sylvia Bohnenstengel <62748926+Sylviabohnenstengel@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:28:25 +0100 Subject: [PATCH 54/90] Update documentation.rst --- docs/source/contributing/documentation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/contributing/documentation.rst b/docs/source/contributing/documentation.rst index 75ac1a2ac..aa91e9035 100644 --- a/docs/source/contributing/documentation.rst +++ b/docs/source/contributing/documentation.rst @@ -10,7 +10,7 @@ maintain documentation within the version control system, and keep it up to date. The `Sphinx website`_ has a useful primer to using reStructuredText for documentation. -The documentation is organised intro sections following the `Diátaxis +The documentation is organised into sections following the `Diátaxis documentation system`_. You can build the documentation of CSET with the following command: From 018126be188d6240db53e73023baa23d1e413f92 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 17:48:52 +0100 Subject: [PATCH 55/90] Change datetime examples to use YYYYMMDDThhmmZ format It is more familiar to users. --- cset-workflow/meta/rose-meta.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 022b97e24..06a1b2c12 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -282,7 +282,7 @@ ns=Models and Cases title=Case study dates description=List of datetimes of cases. help=This should be a python list of ISO 8601 datetime strings indicating the - forecast initiation time (AKA data time) of the data. E.g. 2000-01-01T00:00Z + forecast initiation time (AKA data time) of the data. E.g. 20000101T0000Z Ensure that it is consistent with your data’s first validity time. If not then a warning that cubes can not be loaded is raised. type=python_list @@ -295,7 +295,7 @@ ns=Models and Cases title=Trial start date description=Start date of the trial. help=The start date of the trial, in ISO 8601 format. This is the first date - that the workflow will run from. For example: 2000-01-01T00:00Z + that the workflow will run from. For example: 20000101T0000Z type=quoted compulsory=true sort-key=c1 @@ -306,7 +306,7 @@ title=Trial end date description=End date of the trial. If blank, the workflow will run indefinitely. help=The end date of the trial, in ISO 8601 format. This is the last date that the workflow will run to. If blank, the workflow will run indefinitely. - For example: 2000-01-01T00:00Z + For example: 20000101T0000Z type=quoted compulsory=true sort-key=c2 From 027983834e94160b00cf3e64cf3ab1ae511cb387 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 17:55:31 +0100 Subject: [PATCH 56/90] Clarify help for CSET_CASE_DATES --- cset-workflow/meta/rose-meta.conf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 06a1b2c12..57cdfe575 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -282,9 +282,8 @@ ns=Models and Cases title=Case study dates description=List of datetimes of cases. help=This should be a python list of ISO 8601 datetime strings indicating the - forecast initiation time (AKA data time) of the data. E.g. 20000101T0000Z - Ensure that it is consistent with your data’s first validity time. If not - then a warning that cubes can not be loaded is raised. + forecast initiation time or first validity time. (AKA data time) of the + data. E.g. 20000101T0000Z type=python_list compulsory=true sort-key=b1 From 5442b926e87230dcd14cf5ed9f05f4f43519c13f Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 17:58:17 +0100 Subject: [PATCH 57/90] Clarify that CSET_ANALYSIS_OFFSET Note that if fixes differences between the initiation time and the first validity time. --- cset-workflow/meta/rose-meta.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 57cdfe575..aaacee217 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -337,8 +337,8 @@ ns=Models and Cases title=Analysis offset description=Offset from forecast initiation to verification start. help=The offset in time between the forecast initiation and the start of the - analysis period. This is useful when needed fields are not output on the - first time step. For example: PT1H + analysis period, AKA the first validity time. This is useful when needed + fields are not output on the first time step. For example: PT1H type=quoted compulsory=true sort-key=d2 From 29d5204065b373f6104fb21fa26efceff9a76c1f Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:04:16 +0100 Subject: [PATCH 58/90] Improve definitions of lead time and validity time --- cset-workflow/meta/rose-meta.conf | 69 ++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index aaacee217..be05b662c 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -497,13 +497,16 @@ trigger=template variables=m01_name: this >= 1; # help=The type of date templated into the data path. Affects interpretation of # strftime placeholders in the data path. -# Validity time is when the data is predicting for. - # Forecast initiation time, AKA data time, is the time of the validity time of # the first data point. For realtime forecasts this is approximately when the # forecast was started. -# Forecast lead time is how far from the forecast initiation time the data is. +# Validity time is when the simulated or predicted data are valid for. + +# Forecast lead time is how far the forecasted validity times have advanced +# from the initiation time of the forecast i.e. it is the difference between +# the validity time and the forecast initiation time. + # values="initiation", "validity", "lead" # value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time # compulsory=true @@ -605,9 +608,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -709,9 +714,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -813,9 +820,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -917,9 +926,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -1021,9 +1032,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -1125,9 +1138,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -1229,9 +1244,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -1333,9 +1350,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -1437,9 +1456,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true @@ -1541,9 +1562,11 @@ help=The type of date templated into the data path. Affects interpretation of the first data point. For realtime forecasts this is approximately when the forecast was started. - Validity time is when the data is predicting for. + Validity time is when the simulated or predicted data are valid for. - Forecast lead time is how far from the forecast initiation time the data is. + Forecast lead time is how far the forecasted validity times have advanced + from the initiation time of the forecast i.e. it is the difference between + the validity time and the forecast initiation time. values="initiation", "validity", "lead" value-titles=Forecast Initiation Time, Validity Time, Forecast Lead Time compulsory=true From 026baa3b96b0e102c2a08a203fff2715e1d789fc Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:08:25 +0100 Subject: [PATCH 59/90] Number models from 0 to allow directly using as index This removes the -1 from all usage in include files. --- cset-workflow/includes/basic_qq_plot.cylc | 2 +- .../includes/deterministic_domain_histogram_series.cylc | 2 +- .../includes/deterministic_domain_mean_surface_time_series.cylc | 2 +- ...inistic_domain_mean_vertical_model_level_profile_series.cylc | 2 +- ...erministic_domain_mean_vertical_pressure_profile_series.cylc | 2 +- .../includes/deterministic_domain_surface_histogram_series.cylc | 2 +- .../deterministic_single_point_surface_time_series.cylc | 2 +- cset-workflow/includes/mlevel_transect.cylc | 2 +- cset-workflow/includes/plevel_transect.cylc | 2 +- cset-workflow/includes/plot_spatial_mlevel_model_field.cylc | 2 +- cset-workflow/includes/plot_spatial_plevel_model_field.cylc | 2 +- cset-workflow/includes/plot_spatial_surface_model_field.cylc | 2 +- cset-workflow/lib/python/jinja_utils.py | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cset-workflow/includes/basic_qq_plot.cylc b/cset-workflow/includes/basic_qq_plot.cylc index 15d9d4f21..340349aa6 100644 --- a/cset-workflow/includes/basic_qq_plot.cylc +++ b/cset-workflow/includes/basic_qq_plot.cylc @@ -42,7 +42,7 @@ CSET_RECIPE_NAME = "generic_basic_qq_plot.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/deterministic_domain_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_histogram_series.cylc index 029159ad0..6529bb00a 100644 --- a/cset-workflow/includes/deterministic_domain_histogram_series.cylc +++ b/cset-workflow/includes/deterministic_domain_histogram_series.cylc @@ -10,7 +10,7 @@ CSET_ADDOPTS = """ --VARNAME='{{field}}' --PLEVEL='{{plevel}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc b/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc index 69b9cdac3..b4ce78e67 100644 --- a/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc @@ -8,7 +8,7 @@ CSET_RECIPE_NAME = "generic_surface_domain_mean_time_series.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc index 9489f97c6..82aa47d4b 100644 --- a/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc @@ -8,7 +8,7 @@ CSET_RECIPE_NAME = "generic_mlevel_domain_mean_vertical_profile_series.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc index 039425364..615614086 100644 --- a/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc @@ -8,7 +8,7 @@ CSET_RECIPE_NAME = "generic_plevel_domain_mean_vertical_profile_series.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc index e031b892d..9d1a638e2 100644 --- a/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc +++ b/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc @@ -8,7 +8,7 @@ CSET_RECIPE_NAME = "generic_surface_histogram_series.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc b/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc index 7f6ced215..87d53ef6b 100644 --- a/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc +++ b/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc @@ -8,7 +8,7 @@ CSET_RECIPE_NAME = "generic_surface_single_point_time_series.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' --LONGITUDE_POINT='{{LONGITUDE_POINT}}' --LATITUDE_POINT='{{LATITUDE_POINT}}' --SINGLE_POINT_METHOD='{{SINGLE_POINT_METHOD}}' diff --git a/cset-workflow/includes/mlevel_transect.cylc b/cset-workflow/includes/mlevel_transect.cylc index 710074830..bfc1b0839 100644 --- a/cset-workflow/includes/mlevel_transect.cylc +++ b/cset-workflow/includes/mlevel_transect.cylc @@ -11,7 +11,7 @@ CSET_ADDOPTS = """ --VARNAME='{{field}}' --VERTICAL_COORDINATE='model_level_number' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' --START_COORDS='{{MLEVEL_TRANSECT_STARTCOORDS}}' --FINISH_COORDS='{{MLEVEL_TRANSECT_FINISHCOORDS}}' """ diff --git a/cset-workflow/includes/plevel_transect.cylc b/cset-workflow/includes/plevel_transect.cylc index 18b99f71f..4e4dd78a0 100644 --- a/cset-workflow/includes/plevel_transect.cylc +++ b/cset-workflow/includes/plevel_transect.cylc @@ -11,7 +11,7 @@ CSET_ADDOPTS = """ --VARNAME='{{field}}' --VERTICAL_COORDINATE='pressure' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' --START_COORDS='{{PLEVEL_TRANSECT_STARTCOORDS}}' --FINISH_COORDS='{{PLEVEL_TRANSECT_FINISHCOORDS}}' """ diff --git a/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc b/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc index a4c5cb2ab..9f1a48bde 100644 --- a/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc @@ -10,7 +10,7 @@ CSET_ADDOPTS = """ --VARNAME='{{field}}' --MLEVEL='{{mlevel}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc index cc98bf99b..1f370d3ba 100644 --- a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc @@ -10,7 +10,7 @@ CSET_ADDOPTS = """ --VARNAME='{{field}}' --PLEVEL='{{plevel}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/includes/plot_spatial_surface_model_field.cylc b/cset-workflow/includes/plot_spatial_surface_model_field.cylc index 8da61262c..8c4f481a2 100644 --- a/cset-workflow/includes/plot_spatial_surface_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_surface_model_field.cylc @@ -8,7 +8,7 @@ CSET_RECIPE_NAME = "generic_surface_spatial_plot_sequence.yaml" CSET_ADDOPTS = """ --VARNAME='{{field}}' - --MODEL_NAME='{{models[model_number-1]["name"]}}' + --MODEL_NAME='{{models[model_number]["name"]}}' """ MODEL_NUMBER = {{model_number}} {% endfor %} diff --git a/cset-workflow/lib/python/jinja_utils.py b/cset-workflow/lib/python/jinja_utils.py index 5c840a732..65801b734 100644 --- a/cset-workflow/lib/python/jinja_utils.py +++ b/cset-workflow/lib/python/jinja_utils.py @@ -38,7 +38,7 @@ def get_models(rose_variables: dict): configuration. """ models = [] - for model in range(1, 11): + for model in range(0, 9): model_prefix = f"m{model:02d}_" model_vars = { key.removeprefix(model_prefix): value From 30f83a09db37967194c2752d90d181203ccc34e2 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:16:43 +0100 Subject: [PATCH 60/90] Add assert message to disambiguate asserts --- tests/workflow_utils/test_fetch_data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index 0a6434019..dc247800a 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -52,13 +52,15 @@ def test_get_needed_environment_variables(monkeypatch): "share_dir": path, } actual = fetch_data._get_needed_environment_variables() - assert actual == expected + assert actual == expected, "Unexpected values from reading environment variables" # Check DATA_PERIOD is not there for initiation. monkeypatch.setenv("DATE_TYPE", "initiation") monkeypatch.delenv("DATA_PERIOD") initiation_actual = fetch_data._get_needed_environment_variables() - assert initiation_actual["data_period"] is None + assert ( + initiation_actual["data_period"] is None + ), "data_period should not be set for initiation time" def test_fetch_data(monkeypatch, tmp_path): From e72e40f6844f62f4c1691ddc7a835fc49c01023f Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:19:42 +0100 Subject: [PATCH 61/90] Rename FileRetriever to show it's an ABC --- cset-workflow/app/fetch_fcst/bin/fetch-data-http.py | 4 ++-- src/CSET/_workflow_utils/fetch_data.py | 8 ++++---- tests/workflow_utils/test_fetch_data.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py b/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py index 4b67d0451..154504e01 100644 --- a/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py +++ b/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py @@ -6,10 +6,10 @@ import urllib.parse import urllib.request -from CSET._workflow_utils.fetch_data import FileRetriever, fetch_data +from CSET._workflow_utils.fetch_data import FileRetrieverABC, fetch_data -class HTTPFileRetriever(FileRetriever): +class HTTPFileRetriever(FileRetrieverABC): """Retrieve files via HTTP.""" def get_file(self, file_path: str, output_dir: str) -> None: diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index 27335fe34..8b6da4371 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -19,7 +19,7 @@ ) -class FileRetriever(abc.ABC): +class FileRetrieverABC(abc.ABC): """Abstract class for retrieving files from a data source. The `get_file` method must be defined. Optionally the __enter__ and __exit__ @@ -31,7 +31,7 @@ class FileRetriever(abc.ABC): method is called for each file path. """ - def __enter__(self) -> "FileRetriever": + def __enter__(self) -> "FileRetrieverABC": """Initialise the file retriever.""" logging.debug("Initialising FileRetriever.") return self @@ -61,7 +61,7 @@ def get_file(self, file_path: str, output_dir: str) -> None: # pragma: no cover raise NotImplementedError -class FilesystemFileRetriever(FileRetriever): +class FilesystemFileRetriever(FileRetrieverABC): """Retrieve files from the filesystem.""" def get_file(self, file_path: str, output_dir: str) -> None: @@ -158,7 +158,7 @@ def _template_file_path( return paths -def fetch_data(file_retriever: FileRetriever = FilesystemFileRetriever): +def fetch_data(file_retriever: FileRetrieverABC = FilesystemFileRetriever): """Fetch the data for a model. The following environment variables need to be set: diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index dc247800a..6fbe55779 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -84,7 +84,7 @@ def mock_template_file_path(*args, **kwargs): files_gotten = False - class MockFileRetriever(fetch_data.FileRetriever): + class MockFileRetriever(fetch_data.FileRetrieverABC): def get_file(self, file_path: str, output_dir: str) -> None: nonlocal files_gotten files_gotten = True From cade4fc2f8cedfe3b128a20a2777d1a203a7c7a2 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:24:15 +0100 Subject: [PATCH 62/90] Clarify point being relative to the data CRS --- cset-workflow/meta/diagnostics/rose-meta.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index d0d9894b0..525eacc2a 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -55,7 +55,7 @@ sort-key=0surface5 [template variables=LATITUDE_POINT] ns=Diagnostics/Quicklook -description=Latitude of selected point. Note that this could be rotated or not, depending on the data provided. +description=Latitude of selected point in the same coordinate system as the data. help=The latitude must exist within the domain. Value should be a float: for example, -1.5. type=real compulsory=true @@ -63,7 +63,7 @@ sort-key=0surface6 [template variables=LONGITUDE_POINT] ns=Diagnostics/Quicklook -description=Longitude of selected point. Note that this could be rotated or not, depending on the data provided. +description=Longitude of selected point in the same coordinate system as the data. help=The longitude must exist within the domain. Value should be a float: for example, 0.8. type=real compulsory=true From c24ecc65df381c5e1621c5a1616b04fc8bfac4d1 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:29:19 +0100 Subject: [PATCH 63/90] Clarify STASH code format --- cset-workflow/meta/diagnostics/rose-meta.conf | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index 525eacc2a..696a6757c 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -11,8 +11,9 @@ title=Surface model fields description=Field names per model. help=Variable names for surface variables. The names across a row should match the same physical phenomenon, and use the appropriate standard, long, or - field name, or the STASH code for each model. Blank entries indicate that a - model does not have that phenomenon, so it will be skipped. + field name for each model. Where applicable, a STASH code in the format + "m??s??i???" maybe be used instead. Blank entries indicate that a model + does not have that phenomenon, so it will be skipped. Ignore the boxes for models that are not enabled. compulsory=true @@ -85,8 +86,9 @@ title=Pressure level model fields description=Field names for each model. help=Variable names for pressure level variables. The names across a row should match the same physical phenomenon, and use the appropriate standard, long, - or field name, or the STASH code for each model. Blank entries indicate that - a model does not have that phenomenon, so it will be skipped. + or field name for each model. Where applicable, a STASH code in the format + "m??s??i???" maybe be used instead. Blank entries indicate that a model + does not have that phenomenon, so it will be skipped. Ignore the boxes for models that are not enabled. compulsory=true @@ -178,8 +180,9 @@ title=Model level model fields description=Field names for each model. help=Variable names for model level variables. The names across a row should match the same physical phenomenon, and use the appropriate standard, long, - or field name, or the STASH code for each model. Blank entries indicate that - a model does not have that phenomenon, so it will be skipped. + or field name for each model. Where applicable, a STASH code in the format + "m??s??i???" maybe be used instead. Blank entries indicate that a model + does not have that phenomenon, so it will be skipped. Ignore the boxes for models that are not enabled. compulsory=true From 330eb5974b5fbc648f9f9fa776b46004821759a3 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:30:42 +0100 Subject: [PATCH 64/90] Remove useless help text --- cset-workflow/meta/rose-meta.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index be05b662c..4a6102650 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -37,7 +37,6 @@ ns=General description=Filepath and name for colorbar details of each variable i.e. name_of_filepath/name_of_filename. An example file is available under CSET/cset-workflow/extra-meta/colorbar_dict_alphabetical.json -help=TODO type=quoted compulsory=true From 5c68f5e5fb8a986e66c3f862adf8879403eba35b Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 3 Sep 2024 18:37:20 +0100 Subject: [PATCH 65/90] Clarify help text around rose edit quoting --- cset-workflow/meta/rose-meta.conf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 4a6102650..979cf8657 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -467,7 +467,7 @@ trigger=template variables=m01_name: this >= 1; # description=The path to the forecast. # help=Full path (including file name) to the forecast data on your chosen storage # system. Can contain wildcards. No quotation marks required in rose edit, as -# it is already quoted there. +# it is automatically quoted there. # # strftime format strings are supported, and will be replaced with the # desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -574,7 +574,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -680,7 +680,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -786,7 +786,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -892,7 +892,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -998,7 +998,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -1104,7 +1104,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -1210,7 +1210,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -1316,7 +1316,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -1422,7 +1422,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` @@ -1528,7 +1528,7 @@ title=Data path description=The path to the forecast. help=Full path (including file name) to the forecast data on your chosen storage system. Can contain wildcards. No quotation marks required in rose edit, as - it is already quoted there. + it is automatically quoted there. strftime format strings are supported, and will be replaced with the desired case study date or trial time. E.g: `/data/%Y%m%d/model1/*.nc` From 7a715d677e9882a6b3f51a3bf9a1a32647b6b54c Mon Sep 17 00:00:00 2001 From: James Frost Date: Wed, 11 Sep 2024 01:19:40 +0100 Subject: [PATCH 66/90] Fix off-by-one error in model numbering --- cset-workflow/lib/python/jinja_utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cset-workflow/lib/python/jinja_utils.py b/cset-workflow/lib/python/jinja_utils.py index 65801b734..ac9631a1c 100644 --- a/cset-workflow/lib/python/jinja_utils.py +++ b/cset-workflow/lib/python/jinja_utils.py @@ -76,11 +76,7 @@ def restructure_field_list(fields: list): batched = getattr(itertools, "batched", _batched) all_fields = batched(fields, max_number_of_models) rearranged = [ - { - field[0] + 1: field[1] - for field in enumerate(equivalent_model_fields) - if field[1] - } + {field[0]: field[1] for field in enumerate(equivalent_model_fields) if field[1]} for equivalent_model_fields in all_fields ] return rearranged From 48ba32544d0b85d184ed723ebf4d6516afecdeef Mon Sep 17 00:00:00 2001 From: James Frost Date: Wed, 11 Sep 2024 01:44:08 +0100 Subject: [PATCH 67/90] Do full range of models --- cset-workflow/lib/python/jinja_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cset-workflow/lib/python/jinja_utils.py b/cset-workflow/lib/python/jinja_utils.py index ac9631a1c..581e2fb28 100644 --- a/cset-workflow/lib/python/jinja_utils.py +++ b/cset-workflow/lib/python/jinja_utils.py @@ -38,7 +38,7 @@ def get_models(rose_variables: dict): configuration. """ models = [] - for model in range(0, 9): + for model in range(0, 10): model_prefix = f"m{model:02d}_" model_vars = { key.removeprefix(model_prefix): value From c30c3cc3ef7a986219bc588de65b4b80fc2f59d5 Mon Sep 17 00:00:00 2001 From: James Frost Date: Wed, 11 Sep 2024 02:00:05 +0100 Subject: [PATCH 68/90] Fix off-by-one vetween rose-suite.conf and list of models --- cset-workflow/lib/python/jinja_utils.py | 2 +- src/CSET/_workflow_utils/fetch_data.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cset-workflow/lib/python/jinja_utils.py b/cset-workflow/lib/python/jinja_utils.py index 581e2fb28..7246d88cf 100644 --- a/cset-workflow/lib/python/jinja_utils.py +++ b/cset-workflow/lib/python/jinja_utils.py @@ -39,7 +39,7 @@ def get_models(rose_variables: dict): """ models = [] for model in range(0, 10): - model_prefix = f"m{model:02d}_" + model_prefix = f"m{model+1:02d}_" model_vars = { key.removeprefix(model_prefix): value for key, value in rose_variables.items() diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index 8b6da4371..8392cb835 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -111,6 +111,7 @@ def _get_needed_environment_variables() -> dict: if variables["date_type"] != "initiation": raise variables["data_period"] = None + logging.debug("Environment variables loaded: %s", variables) return variables From 39c96504563fffab58cf7886f8d63330ad5d2fb0 Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 05:19:42 +0100 Subject: [PATCH 69/90] Clarify coordinates that are in the model's CRS --- cset-workflow/meta/diagnostics/rose-meta.conf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index 696a6757c..480e6a0b5 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -155,7 +155,7 @@ sort-key=1pressure5 [template variables=PLEVEL_TRANSECT_STARTCOORDS] ns=Diagnostics/Quicklook description=Start latitude, longitude of the cross section. -help=The latitude, longitude coordinate with respect to the model grid where the +help=The latitude, longitude coordinate in the model coordinate system where the cross section will start i.e. the furthest left hand point of the plot, where the x axis is distance along transect, and y axis is pressure level). type=real,real @@ -165,7 +165,7 @@ sort-key=1pressure6 [template variables=PLEVEL_TRANSECT_FINISHCOORDS] ns=Diagnostics/Quicklook description=Finish latitude, longitude of the cross section. -help=The latitude, longitude coordinate with respect to the model grid where the +help=The latitude, longitude coordinate in the model coordinate system where the cross section will finish i.e. the furthest right hand point of the plot, where the x axis is distance along transect, and y axis is pressure level). type=real,real @@ -233,7 +233,7 @@ sort-key=2modellevel5 [template variables=MLEVEL_TRANSECT_STARTCOORDS] ns=Diagnostics/Quicklook description=Start latitude, longitude of the cross section. -help=The latitude, longitude coordinate with respect to the model grid where the +help=The latitude, longitude coordinate in the model coordinate system where the cross section will start i.e. the furthest left hand point of the plot, where the x axis is distance along transect, and y axis is pressure level). type=real,real @@ -243,7 +243,7 @@ sort-key=2modellevel6 [template variables=MLEVEL_TRANSECT_FINISHCOORDS] ns=Diagnostics/Quicklook description=Finish latitude, longitude of the cross section. -help=The latitude, longitude coordinate with respect to the model grid where the +help=The latitude, longitude coordinate in the model coordinate system where the cross section will finish i.e. the furthest right hand point of the plot, where the x axis is distance along transect, and y axis is pressure level). type=real,real From 7669e5e1cc703c80b31f12b14d59de69d7450ddb Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 05:38:06 +0100 Subject: [PATCH 70/90] Default to False when setting doesn't exist This is useful if the setting is not in the rose-suite.conf --- cset-workflow/flow.cylc | 36 ++++++++++--------- cset-workflow/includes/basic_qq_plot.cylc | 2 +- ...deterministic_domain_histogram_series.cylc | 2 +- ...istic_domain_mean_surface_time_series.cylc | 2 +- ...n_vertical_model_level_profile_series.cylc | 2 +- ...mean_vertical_pressure_profile_series.cylc | 2 +- ...istic_domain_surface_histogram_series.cylc | 2 +- .../deterministic_plot_cape_ratio.cylc | 2 +- .../deterministic_plot_inflow_properties.cylc | 2 +- ...stic_single_point_surface_time_series.cylc | 2 +- cset-workflow/includes/metplus_grid_stat.cylc | 2 +- cset-workflow/includes/mlevel_transect.cylc | 2 +- cset-workflow/includes/plevel_transect.cylc | 2 +- .../plot_spatial_mlevel_model_field.cylc | 2 +- .../plot_spatial_plevel_model_field.cylc | 2 +- .../plot_spatial_surface_model_field.cylc | 2 +- cset-workflow/includes/point_stat.cylc | 2 +- 17 files changed, 35 insertions(+), 33 deletions(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index 3eb8b923a..3fa42417a 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -17,7 +17,7 @@ URL = https://metoffice.github.io/CSET {% elif CSET_CYCLING_MODE == "trial" %} initial cycle point = {{CSET_TRIAL_START_DATE}} # End date can be blank. - {% if CSET_TRIAL_END_DATE %} + {% if CSET_TRIAL_END_DATE|default(False) %} final cycle point = {{CSET_TRIAL_END_DATE}} {% endif %} {% endif %} @@ -46,12 +46,12 @@ URL = https://metoffice.github.io/CSET # Can only run tasks on final cycle point if it exists, so skip for # continuous trials. - {% if CSET_CYCLING_MODE != "trial" or CSET_TRIAL_END_DATE %} - # Only runs on the final cycle. - R1/$ = """ - housekeeping_raw => finish_website => send_email - housekeeping_raw => housekeeping_full - """ + {% if CSET_CYCLING_MODE != "trial" or CSET_TRIAL_END_DATE|default(False) %} + # Only runs on the final cycle. + R1/$ = """ + housekeeping_raw => finish_website => send_email + housekeeping_raw => housekeeping_full + """ {% endif %} [runtime] @@ -59,20 +59,22 @@ URL = https://metoffice.github.io/CSET script = rose task-run -v execution time limit = PT15M [[[environment]]] - CSET_ENV_USE_MODULES = {{CSET_ENV_USE_MODULES}} - {% if CSET_ENV_USE_MODULES %} + # As these variables are used in the environment script, they must be + # defined. + CSET_ENV_USE_MODULES = {{CSET_ENV_USE_MODULES|default(False)}} + {% if CSET_ENV_USE_MODULES|default(False) %} MODULES_LIST = {{MODULES_LIST}} MODULES_PURGE = {{MODULES_PURGE}} {% endif %} - CSET_ENV_USE_CONDA = {{CSET_ENV_USE_CONDA}} - {% if CSET_ENV_USE_CONDA %} + CSET_ENV_USE_CONDA = {{CSET_ENV_USE_CONDA|default(False)}} + {% if CSET_ENV_USE_CONDA|default(False) %} CONDA_PATH = {{CONDA_PATH}} CONDA_VENV_LOCATION = {{CONDA_VENV_LOCATION}} {% endif %} - CSET_ENV_SEPARATE_MET = {{CSET_ENV_SEPARATE_MET}} - {% if CSET_ENV_SEPARATE_MET %} + CSET_ENV_SEPARATE_MET = {{CSET_ENV_SEPARATE_MET|default(False)}} + {% if CSET_ENV_SEPARATE_MET|default(False) %} CONDA_METPLUS_VENV_LOCATION = {{CONDA_METPLUS_VENV_LOCATION}} MET_INSTALL_DIR = {{MET_INSTALL_DIR}} MET_BUILD_BASE = {{MET_BUILD_BASE}} @@ -82,7 +84,7 @@ URL = https://metoffice.github.io/CSET LOGLEVEL = {{LOGLEVEL}} COLORBAR_FILE = {{COLORBAR_FILE}} - PLOT_RESOLUTION = {{PLOT_RESOLUTION}} + PLOT_RESOLUTION = {{PLOT_RESOLUTION|default(100)}} [[PROCESS]] script = rose task-run -v --app-key=run_cset_recipe @@ -97,7 +99,7 @@ URL = https://metoffice.github.io/CSET [[METPLUS]] [[[environment]]] - {% if METPLUS_GRID_STAT %} + {% if METPLUS_GRID_STAT|default(False) %} METPLUS_ANA_DIR = {{METPLUS_ANA_DIR}} METPLUS_FCST_DIR = {{METPLUS_FCST_DIR}} METPLUS_OBS_DIR = {{METPLUS_OBS_DIR}} @@ -121,8 +123,8 @@ URL = https://metoffice.github.io/CSET execution time limit = PT30M [[[environment]]] CONDA_VENV_CREATE = {{CONDA_VENV_CREATE}} - CSET_ENV_USE_LOCAL_CSET = {{CSET_ENV_USE_LOCAL_CSET}} - {% if CSET_ENV_USE_LOCAL_CSET %} + CSET_ENV_USE_LOCAL_CSET = {{CSET_ENV_USE_LOCAL_CSET|default(False) }} + {% if CSET_ENV_USE_LOCAL_CSET|default(False) %} CSET_LOCAL_CSET_PATH = {{CSET_LOCAL_CSET_PATH}} {% endif %} diff --git a/cset-workflow/includes/basic_qq_plot.cylc b/cset-workflow/includes/basic_qq_plot.cylc index 340349aa6..ede7eebf5 100644 --- a/cset-workflow/includes/basic_qq_plot.cylc +++ b/cset-workflow/includes/basic_qq_plot.cylc @@ -1,4 +1,4 @@ -{% if BASIC_QQ_PLOT %} +{% if BASIC_QQ_PLOT|default(False) %} [runtime] {% for model_field_A, model_field_B, coord_A, coord_B in zip(MODEL_FIELDS_A, MODEL_FIELDS_B,VERTICAL_COORDINATE_A, VERTICAL_COORDINATE_B) %} [[generic_basic_qq_plot_{{model_field_A}}_{{model_field_B}}_parallel]] diff --git a/cset-workflow/includes/deterministic_domain_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_histogram_series.cylc index 6529bb00a..77e61a414 100644 --- a/cset-workflow/includes/deterministic_domain_histogram_series.cylc +++ b/cset-workflow/includes/deterministic_domain_histogram_series.cylc @@ -1,4 +1,4 @@ -{% if DOMAIN_HISTOGRAM_SERIES %} +{% if DOMAIN_HISTOGRAM_SERIES|default(False) %} {% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} {% for plevel in PRESSURE_LEVELS %} diff --git a/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc b/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc index b4ce78e67..014dd5da9 100644 --- a/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_surface_time_series.cylc @@ -1,4 +1,4 @@ -{% if DOMAIN_MEAN_SURFACE_TIME_SERIES %} +{% if DOMAIN_MEAN_SURFACE_TIME_SERIES|default(False) %} {% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc index 82aa47d4b..6379eaca7 100644 --- a/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_vertical_model_level_profile_series.cylc @@ -1,4 +1,4 @@ -{% if DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES %} +{% if DOMAIN_MEAN_VERTICAL_PROFILE_MODELLEVEL_SERIES|default(False) %} {% for equivalent_field in restructure_field_list(MODEL_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc b/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc index 615614086..452d897bc 100644 --- a/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc +++ b/cset-workflow/includes/deterministic_domain_mean_vertical_pressure_profile_series.cylc @@ -1,4 +1,4 @@ -{% if DOMAIN_MEAN_VERTICAL_PROFILE_SERIES %} +{% if DOMAIN_MEAN_VERTICAL_PROFILE_SERIES|default(False) %} {% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc b/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc index 9d1a638e2..bb0f22048 100644 --- a/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc +++ b/cset-workflow/includes/deterministic_domain_surface_histogram_series.cylc @@ -1,4 +1,4 @@ -{% if DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD %} +{% if DOMAIN_SURFACE_HISTOGRAM_SERIES_FIELD|default(False) %} {% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/deterministic_plot_cape_ratio.cylc b/cset-workflow/includes/deterministic_plot_cape_ratio.cylc index ee0f0be65..60a8f79da 100644 --- a/cset-workflow/includes/deterministic_plot_cape_ratio.cylc +++ b/cset-workflow/includes/deterministic_plot_cape_ratio.cylc @@ -1,4 +1,4 @@ -{% if DETERMINISTIC_PLOT_CAPE_RATIO %} +{% if DETERMINISTIC_PLOT_CAPE_RATIO|default(False) %} {% for model in models %} [runtime] [[plot_cape_ratio_m{{model["number"]}}]] diff --git a/cset-workflow/includes/deterministic_plot_inflow_properties.cylc b/cset-workflow/includes/deterministic_plot_inflow_properties.cylc index 9593172b7..7c3481429 100644 --- a/cset-workflow/includes/deterministic_plot_inflow_properties.cylc +++ b/cset-workflow/includes/deterministic_plot_inflow_properties.cylc @@ -1,4 +1,4 @@ -{% if DETERMINISTIC_PLOT_INFLOW_PROPERTIES %} +{% if DETERMINISTIC_PLOT_INFLOW_PROPERTIES|default(False) %} {% for model in models %} [runtime] [[inflow_layer_properties_plot_m{{model["number"]}}]] diff --git a/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc b/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc index 87d53ef6b..7745c33d6 100644 --- a/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc +++ b/cset-workflow/includes/deterministic_single_point_surface_time_series.cylc @@ -1,4 +1,4 @@ -{% if SURFACE_SINGLE_POINT_TIME_SERIES %} +{% if SURFACE_SINGLE_POINT_TIME_SERIES|default(False) %} {% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/metplus_grid_stat.cylc b/cset-workflow/includes/metplus_grid_stat.cylc index dc361d937..c08182fb1 100644 --- a/cset-workflow/includes/metplus_grid_stat.cylc +++ b/cset-workflow/includes/metplus_grid_stat.cylc @@ -1,4 +1,4 @@ -{% if METPLUS_GRID_STAT %} +{% if METPLUS_GRID_STAT|default(False) %} [scheduling] [[graph]] {{CSET_CYCLE_PERIOD}} = """ diff --git a/cset-workflow/includes/mlevel_transect.cylc b/cset-workflow/includes/mlevel_transect.cylc index bfc1b0839..cb4859952 100644 --- a/cset-workflow/includes/mlevel_transect.cylc +++ b/cset-workflow/includes/mlevel_transect.cylc @@ -1,4 +1,4 @@ -{% if EXTRACT_MLEVEL_TRANSECT %} +{% if EXTRACT_MLEVEL_TRANSECT|default(False) %} {% for equivalent_field in restructure_field_list(MODEL_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/plevel_transect.cylc b/cset-workflow/includes/plevel_transect.cylc index 4e4dd78a0..7c04144c0 100644 --- a/cset-workflow/includes/plevel_transect.cylc +++ b/cset-workflow/includes/plevel_transect.cylc @@ -1,4 +1,4 @@ -{% if EXTRACT_PLEVEL_TRANSECT %} +{% if EXTRACT_PLEVEL_TRANSECT|default(False) %} {% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc b/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc index 9f1a48bde..f29e0a1ea 100644 --- a/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_mlevel_model_field.cylc @@ -1,4 +1,4 @@ -{% if PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD %} +{% if PLOT_SPATIAL_MODEL_LEVEL_MODEL_FIELD|default(False) %} {% for equivalent_field in restructure_field_list(MODEL_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} {% for mlevel in UM_MODEL_LEVELS %} diff --git a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc index 1f370d3ba..18b85b7d4 100644 --- a/cset-workflow/includes/plot_spatial_plevel_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_plevel_model_field.cylc @@ -1,4 +1,4 @@ -{% if PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD %} +{% if PLOT_SPATIAL_PRESSURE_LEVEL_MODEL_FIELD|default(False) %} {% for equivalent_field in restructure_field_list(PRESSURE_LEVEL_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} {% for plevel in PRESSURE_LEVELS %} diff --git a/cset-workflow/includes/plot_spatial_surface_model_field.cylc b/cset-workflow/includes/plot_spatial_surface_model_field.cylc index 8c4f481a2..4771b8fed 100644 --- a/cset-workflow/includes/plot_spatial_surface_model_field.cylc +++ b/cset-workflow/includes/plot_spatial_surface_model_field.cylc @@ -1,4 +1,4 @@ -{% if PLOT_SPATIAL_SURFACE_MODEL_FIELD %} +{% if PLOT_SPATIAL_SURFACE_MODEL_FIELD|default(False) %} {% for equivalent_field in restructure_field_list(SURFACE_MODEL_FIELDS) %} {% for model_number, field in equivalent_field.items() %} [runtime] diff --git a/cset-workflow/includes/point_stat.cylc b/cset-workflow/includes/point_stat.cylc index 7540e18f9..19e03a034 100644 --- a/cset-workflow/includes/point_stat.cylc +++ b/cset-workflow/includes/point_stat.cylc @@ -1,4 +1,4 @@ -{% if METPLUS_POINT_STAT %} +{% if METPLUS_POINT_STAT|default(False) %} [scheduling] [[graph]] {{CSET_CYCLE_PERIOD}} = """ From ab490eb1e66fac0b05bc8e3223f53a8020812c50 Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 06:09:32 +0100 Subject: [PATCH 71/90] Make runahead limit an optional configuration --- cset-workflow/flow.cylc | 4 ++++ cset-workflow/meta/rose-meta.conf | 4 +++- cset-workflow/rose-suite.conf.example | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index 3fa42417a..6a80f94ed 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -8,7 +8,11 @@ URL = https://metoffice.github.io/CSET {% set models = get_models(ROSE_SUITE_VARIABLES) %} [scheduling] + # There is rarely a reason to manually set the runahead limit, as we will + # usually be limited by number of concurrent jobs. + {% if CSET_RUNAHEAD_LIMIT|default(False) %} runahead limit = P{{CSET_RUNAHEAD_LIMIT}} + {% endif %} # Initial and final cycle points cover the entire period of interest. {% if CSET_CYCLING_MODE == "case_study" %} diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 979cf8657..d76a7eb70 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -95,9 +95,11 @@ help=The maximum number of cycles run in parallel. A larger number here will finish quicker, but utilise more compute resources at once. For a large enough workflow it may overwhelm the batch submission system, so it is recommended to keep this below 10. + + As we will usually be constrained by the maximum number of concurrent jobs, + it is usually best to just leave this undefined, to use the default. type=integer range=0: -compulsory=true ################################################################################ diff --git a/cset-workflow/rose-suite.conf.example b/cset-workflow/rose-suite.conf.example index 32717b2e0..7f962ff5b 100644 --- a/cset-workflow/rose-suite.conf.example +++ b/cset-workflow/rose-suite.conf.example @@ -16,7 +16,7 @@ CSET_ENV_USE_LOCAL_CSET=False CSET_ENV_USE_MODULES=False !!CSET_LOCAL_CSET_PATH="" CSET_MODEL_COUNT=1 -CSET_RUNAHEAD_LIMIT=5 +! CSET_RUNAHEAD_LIMIT=0 !!CSET_TRIAL_CYCLE_PERIOD="" !!CSET_TRIAL_END_DATE="" !!CSET_TRIAL_START_DATE="" From d51a952dfb617b3b0a2a60575a2dcb5cbf7711de Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 06:10:09 +0100 Subject: [PATCH 72/90] Add some comments to flow.cylc --- cset-workflow/flow.cylc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index 6a80f94ed..bf276d4f0 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -4,9 +4,12 @@ title = CSET description = Workflow for running CSET. URL = https://metoffice.github.io/CSET +# Import all of our Jinja utilities for use in the workflow. {% from "jinja_utils" import get_models, glob, max, min, zip, restructure_field_list, sanitise_task_name %} +# Load a list a model detail dictionaries. {% set models = get_models(ROSE_SUITE_VARIABLES) %} + [scheduling] # There is rarely a reason to manually set the runahead limit, as we will # usually be limited by number of concurrent jobs. @@ -58,6 +61,7 @@ URL = https://metoffice.github.io/CSET """ {% endif %} + [runtime] [[root]] script = rose task-run -v From 8404892f0bc809c9b55e5ca5c14692613d89690f Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 06:17:29 +0100 Subject: [PATCH 73/90] Indent jinja code for readability --- cset-workflow/flow.cylc | 70 ++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index bf276d4f0..3282fd0b1 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -14,19 +14,19 @@ URL = https://metoffice.github.io/CSET # There is rarely a reason to manually set the runahead limit, as we will # usually be limited by number of concurrent jobs. {% if CSET_RUNAHEAD_LIMIT|default(False) %} - runahead limit = P{{CSET_RUNAHEAD_LIMIT}} + runahead limit = P{{CSET_RUNAHEAD_LIMIT}} {% endif %} # Initial and final cycle points cover the entire period of interest. {% if CSET_CYCLING_MODE == "case_study" %} - initial cycle point = {{ min(CSET_CASE_DATES) }} - final cycle point = {{ max(CSET_CASE_DATES) }} + initial cycle point = {{ min(CSET_CASE_DATES) }} + final cycle point = {{ max(CSET_CASE_DATES) }} {% elif CSET_CYCLING_MODE == "trial" %} - initial cycle point = {{CSET_TRIAL_START_DATE}} - # End date can be blank. - {% if CSET_TRIAL_END_DATE|default(False) %} - final cycle point = {{CSET_TRIAL_END_DATE}} - {% endif %} + initial cycle point = {{CSET_TRIAL_START_DATE}} + # End date can be blank. + {% if CSET_TRIAL_END_DATE|default(False) %} + final cycle point = {{CSET_TRIAL_END_DATE}} + {% endif %} {% endif %} [[graph]] @@ -36,19 +36,19 @@ URL = https://metoffice.github.io/CSET """ {% if CSET_CYCLING_MODE == "case_study" %} - # Runs for every forecast initiation time to process the data in parallel. - {% for date in CSET_CASE_DATES %} - R1/{{date}} = """ - setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete - fetch_complete => PROCESS:finish-all => housekeeping_raw - """ - {% endfor %} + # Runs for every forecast initiation time to process the data in parallel. + {% for date in CSET_CASE_DATES %} + R1/{{date}} = """ + setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete + fetch_complete => PROCESS:finish-all => housekeeping_raw + """ + {% endfor %} {% elif CSET_CYCLING_MODE == "trial" %} - # Analyse from each forecast. - {{CSET_TRIAL_CYCLE_PERIOD}} = """ - setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete - fetch_complete => PROCESS:finish-all => housekeeping_raw - """ + # Analyse from each forecast. + {{CSET_TRIAL_CYCLE_PERIOD}} = """ + setup_complete[^] => FETCH_DATA:succeed-all => fetch_complete + fetch_complete => PROCESS:finish-all => housekeeping_raw + """ {% endif %} # Can only run tasks on final cycle point if it exists, so skip for @@ -71,23 +71,23 @@ URL = https://metoffice.github.io/CSET # defined. CSET_ENV_USE_MODULES = {{CSET_ENV_USE_MODULES|default(False)}} {% if CSET_ENV_USE_MODULES|default(False) %} - MODULES_LIST = {{MODULES_LIST}} - MODULES_PURGE = {{MODULES_PURGE}} + MODULES_LIST = {{MODULES_LIST}} + MODULES_PURGE = {{MODULES_PURGE}} {% endif %} CSET_ENV_USE_CONDA = {{CSET_ENV_USE_CONDA|default(False)}} {% if CSET_ENV_USE_CONDA|default(False) %} - CONDA_PATH = {{CONDA_PATH}} - CONDA_VENV_LOCATION = {{CONDA_VENV_LOCATION}} + CONDA_PATH = {{CONDA_PATH}} + CONDA_VENV_LOCATION = {{CONDA_VENV_LOCATION}} {% endif %} CSET_ENV_SEPARATE_MET = {{CSET_ENV_SEPARATE_MET|default(False)}} {% if CSET_ENV_SEPARATE_MET|default(False) %} - CONDA_METPLUS_VENV_LOCATION = {{CONDA_METPLUS_VENV_LOCATION}} - MET_INSTALL_DIR = {{MET_INSTALL_DIR}} - MET_BUILD_BASE = {{MET_BUILD_BASE}} - METPLUS_BASE = {{METPLUS_BASE}} - MET_LIBRARIES = {{MET_LIBRARIES}} + CONDA_METPLUS_VENV_LOCATION = {{CONDA_METPLUS_VENV_LOCATION}} + MET_INSTALL_DIR = {{MET_INSTALL_DIR}} + MET_BUILD_BASE = {{MET_BUILD_BASE}} + METPLUS_BASE = {{METPLUS_BASE}} + MET_LIBRARIES = {{MET_LIBRARIES}} {% endif %} LOGLEVEL = {{LOGLEVEL}} @@ -108,10 +108,10 @@ URL = https://metoffice.github.io/CSET [[METPLUS]] [[[environment]]] {% if METPLUS_GRID_STAT|default(False) %} - METPLUS_ANA_DIR = {{METPLUS_ANA_DIR}} - METPLUS_FCST_DIR = {{METPLUS_FCST_DIR}} - METPLUS_OBS_DIR = {{METPLUS_OBS_DIR}} - ROSE_APP_OPT_CONF_KEYS = {{METPLUS_OPT_CONFIG_KEYS}} + METPLUS_ANA_DIR = {{METPLUS_ANA_DIR}} + METPLUS_FCST_DIR = {{METPLUS_FCST_DIR}} + METPLUS_OBS_DIR = {{METPLUS_OBS_DIR}} + ROSE_APP_OPT_CONF_KEYS = {{METPLUS_OPT_CONFIG_KEYS}} {% endif %} [[DUMMY_TASK]] @@ -133,7 +133,7 @@ URL = https://metoffice.github.io/CSET CONDA_VENV_CREATE = {{CONDA_VENV_CREATE}} CSET_ENV_USE_LOCAL_CSET = {{CSET_ENV_USE_LOCAL_CSET|default(False) }} {% if CSET_ENV_USE_LOCAL_CSET|default(False) %} - CSET_LOCAL_CSET_PATH = {{CSET_LOCAL_CSET_PATH}} + CSET_LOCAL_CSET_PATH = {{CSET_LOCAL_CSET_PATH}} {% endif %} [[install_website_skeleton]] @@ -151,7 +151,7 @@ URL = https://metoffice.github.io/CSET DATA_PATH = {{model["data_path"]}} DATE_TYPE = {{model["date_type"]}} {% if model["date_type"] != "initiation" %} - DATA_PERIOD = {{model["data_period"]}} + DATA_PERIOD = {{model["data_period"]}} {% endif %} {% endfor %} From c1be5f4c80a420eeeb844317928645bac8407715 Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 07:37:40 +0100 Subject: [PATCH 74/90] Note that preprocessing is not yet implemented --- cset-workflow/meta/rose-meta.conf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index d76a7eb70..a33a39ca4 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -528,7 +528,7 @@ trigger=template variables=m01_name: this >= 1; # [template variables=m??_preprocessing] # ns=Models and Cases/Model ?? # title=Preprocess -# description=Preprocess all of the model data. +# description=Preprocess all of the model data. NOTE: Not yet implemented. # help=Whether to preprocess all of the model data. This is useful for applying # any necessary transformations to the data before it is used in the workflow, # such as removing boundary regions. @@ -634,7 +634,7 @@ sort-key=b2 [template variables=m01_preprocessing] ns=Models and Cases/Model 01 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -740,7 +740,7 @@ sort-key=b2 [template variables=m02_preprocessing] ns=Models and Cases/Model 02 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -846,7 +846,7 @@ sort-key=b2 [template variables=m03_preprocessing] ns=Models and Cases/Model 03 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -952,7 +952,7 @@ sort-key=b2 [template variables=m04_preprocessing] ns=Models and Cases/Model 04 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -1058,7 +1058,7 @@ sort-key=b2 [template variables=m05_preprocessing] ns=Models and Cases/Model 05 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -1164,7 +1164,7 @@ sort-key=b2 [template variables=m06_preprocessing] ns=Models and Cases/Model 06 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -1270,7 +1270,7 @@ sort-key=b2 [template variables=m07_preprocessing] ns=Models and Cases/Model 07 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -1376,7 +1376,7 @@ sort-key=b2 [template variables=m08_preprocessing] ns=Models and Cases/Model 08 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -1482,7 +1482,7 @@ sort-key=b2 [template variables=m09_preprocessing] ns=Models and Cases/Model 09 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. @@ -1588,7 +1588,7 @@ sort-key=b2 [template variables=m10_preprocessing] ns=Models and Cases/Model 10 title=Preprocess -description=Preprocess all of the model data. +description=Preprocess all of the model data. NOTE: Not yet implemented. help=Whether to preprocess all of the model data. This is useful for applying any necessary transformations to the data before it is used in the workflow, such as removing boundary regions. From f101c970acad6d37562020e7901222635a38849b Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 22:49:45 +0100 Subject: [PATCH 75/90] Put all fetch_fcst options directly into rose-app.conf --- cset-workflow/app/fetch_fcst/opt/rose-app-filesystem.conf | 2 -- cset-workflow/app/fetch_fcst/opt/rose-app-http.conf | 2 -- cset-workflow/app/fetch_fcst/opt/rose-app-mass.conf | 2 -- cset-workflow/app/fetch_fcst/rose-app.conf | 5 ++++- cset-workflow/flow.cylc | 2 +- 5 files changed, 5 insertions(+), 8 deletions(-) delete mode 100644 cset-workflow/app/fetch_fcst/opt/rose-app-filesystem.conf delete mode 100644 cset-workflow/app/fetch_fcst/opt/rose-app-http.conf delete mode 100644 cset-workflow/app/fetch_fcst/opt/rose-app-mass.conf diff --git a/cset-workflow/app/fetch_fcst/opt/rose-app-filesystem.conf b/cset-workflow/app/fetch_fcst/opt/rose-app-filesystem.conf deleted file mode 100644 index 824320295..000000000 --- a/cset-workflow/app/fetch_fcst/opt/rose-app-filesystem.conf +++ /dev/null @@ -1,2 +0,0 @@ -[command] -default=app_env_wrapper fetch-data-filesystem.py diff --git a/cset-workflow/app/fetch_fcst/opt/rose-app-http.conf b/cset-workflow/app/fetch_fcst/opt/rose-app-http.conf deleted file mode 100644 index 5a84f0c97..000000000 --- a/cset-workflow/app/fetch_fcst/opt/rose-app-http.conf +++ /dev/null @@ -1,2 +0,0 @@ -[command] -default=app_env_wrapper fetch-data-http.py diff --git a/cset-workflow/app/fetch_fcst/opt/rose-app-mass.conf b/cset-workflow/app/fetch_fcst/opt/rose-app-mass.conf deleted file mode 100644 index 41bec033d..000000000 --- a/cset-workflow/app/fetch_fcst/opt/rose-app-mass.conf +++ /dev/null @@ -1,2 +0,0 @@ -[command] -default=app_env_wrapper restricted-fetch-data-mass.py diff --git a/cset-workflow/app/fetch_fcst/rose-app.conf b/cset-workflow/app/fetch_fcst/rose-app.conf index 9ec23ed3d..dcd63246b 100644 --- a/cset-workflow/app/fetch_fcst/rose-app.conf +++ b/cset-workflow/app/fetch_fcst/rose-app.conf @@ -1,2 +1,5 @@ [command] -default=echo "Please set FETCH_FCST_OPT_CONF to your storage system."; false +default=echo "Please set ROSE_APP_COMMAND_KEY to your storage system."; false +filesystem=app_env_wrapper fetch-data-filesystem.py +http=app_env_wrapper fetch-data-http.py +mass=app_env_wrapper restricted-fetch-data-mass.py diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index 3282fd0b1..42f61c8bc 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -147,7 +147,7 @@ URL = https://metoffice.github.io/CSET inherit = FETCH_DATA [[[environment]]] MODEL_NUMBER = {{model["number"]}} - ROSE_APP_OPT_CONF_KEYS = {{model["data_source"]}} + ROSE_APP_COMMAND_KEY = {{model["data_source"]}} DATA_PATH = {{model["data_path"]}} DATE_TYPE = {{model["date_type"]}} {% if model["date_type"] != "initiation" %} From 1ce5f15f87aa5c152ac3c84e28af1c0512383cb4 Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 23:39:49 +0100 Subject: [PATCH 76/90] Move HTTPFileRetriever into fetch_data module Also add a test. --- .../app/fetch_fcst/bin/fetch-data-http.py | 28 +------------------ src/CSET/_workflow_utils/fetch_data.py | 28 ++++++++++++++++++- tests/workflow_utils/test_fetch_data.py | 14 ++++++++++ 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py b/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py index 154504e01..39085bb44 100644 --- a/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py +++ b/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py @@ -2,32 +2,6 @@ """Retrieve files via HTTP.""" -import ssl -import urllib.parse -import urllib.request - -from CSET._workflow_utils.fetch_data import FileRetrieverABC, fetch_data - - -class HTTPFileRetriever(FileRetrieverABC): - """Retrieve files via HTTP.""" - - def get_file(self, file_path: str, output_dir: str) -> None: - """Save a file from a HTTP address to the output directory. - - Parameters - ---------- - file_path: str - Path of the file to copy on MASS. It may contain patterns - like globs, which will be expanded in a system specific manner. - output_dir: str - Path to filesystem directory into which the file should be copied. - """ - ctx = ssl.create_default_context() - save_path = urllib.parse.urlparse(file_path).path.split("/")[-1] - with urllib.request.urlopen(file_path, output_dir, context=ctx) as response: - with open(save_path, "wb") as fp: - fp.write(response.read()) - +from CSET._workflow_utils.fetch_data import HTTPFileRetriever, fetch_data fetch_data(HTTPFileRetriever) diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index 8392cb835..ce4ecb71e 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -7,7 +7,10 @@ import logging import os import shutil +import ssl import sys +import urllib.parse +import urllib.request from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta from typing import Literal @@ -20,7 +23,7 @@ class FileRetrieverABC(abc.ABC): - """Abstract class for retrieving files from a data source. + """Abstract base class for retrieving files from a data source. The `get_file` method must be defined. Optionally the __enter__ and __exit__ methods maybe be overridden to add setup or cleanup code. @@ -86,6 +89,29 @@ def get_file(self, file_path: str, output_dir: str) -> None: logging.warning("Failed to copy %s, error: %s", file, err) +class HTTPFileRetriever(FileRetrieverABC): + """Retrieve files via HTTP.""" + + def get_file(self, file_path: str, output_dir: str) -> None: + """Save a file from a HTTP address to the output directory. + + Parameters + ---------- + file_path: str + Path of the file to copy on MASS. It may contain patterns like + globs, which will be expanded in a system specific manner. + output_dir: str + Path to filesystem directory into which the file should be copied. + """ + ctx = ssl.create_default_context() + save_path = urllib.parse.urlparse(file_path).path.split("/")[-1] + with urllib.request.urlopen(file_path, output_dir, context=ctx) as response: + with open(save_path, "wb") as fp: + # Read in 1 MiB chunks so data doesn't all have to be in memory. + while data := response.read(1024 * 1024): + fp.write(data) + + def _get_needed_environment_variables() -> dict: """Load the needed variables from the environment.""" # Python 3.10 and older don't fully support ISO 8601 datetime formats. diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index 6fbe55779..e7b000504 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -15,6 +15,7 @@ """Tests for fetch_data workflow utility.""" import datetime +import hashlib from pathlib import Path import pytest @@ -186,3 +187,16 @@ def test_FilesystemFileRetriever_copy_error(caplog): log_record = caplog.records[0] assert log_record.levelname == "WARNING" assert log_record.message.startswith("Failed to copy") + + +def test_HTTPFileRetriever(tmp_path): + """Test retrieving a file via HTTP.""" + url = "https://github.com/MetOffice/CSET/raw/48dc1d29846604aacb8d370b82bca31405931c87/tests/test_data/exeter_em01.nc" + with fetch_data.HTTPFileRetriever() as hfr: + hfr.get_file(url, str(tmp_path)) + file = tmp_path / "exeter_em01.nc" + assert file.is_file() + # Check file hash is correct, indicating a non-corrupt download. + expected_hash = "67899970eeca75b9378f0275ce86db3d1d613f2bc7a178540912848dc8a69ca7" + actual_hash = hashlib.sha256(file.read_bytes()).hexdigest() + assert actual_hash == expected_hash From acc401cda5b03150ddef22c8a083275b4243cfd9 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 00:08:45 +0100 Subject: [PATCH 77/90] Fix HTTPFileRetriever The output_dir was in the wrong place, so it was making a POST request. --- src/CSET/_workflow_utils/fetch_data.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index ce4ecb71e..a11c10442 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -104,8 +104,11 @@ def get_file(self, file_path: str, output_dir: str) -> None: Path to filesystem directory into which the file should be copied. """ ctx = ssl.create_default_context() - save_path = urllib.parse.urlparse(file_path).path.split("/")[-1] - with urllib.request.urlopen(file_path, output_dir, context=ctx) as response: + save_path = ( + f"{output_dir.removesuffix('/')}/" + + urllib.parse.urlparse(file_path).path.split("/")[-1] + ) + with urllib.request.urlopen(file_path, context=ctx) as response: with open(save_path, "wb") as fp: # Read in 1 MiB chunks so data doesn't all have to be in memory. while data := response.read(1024 * 1024): From a0aff827895750a773f85e6ee74001610963506a Mon Sep 17 00:00:00 2001 From: James Frost Date: Mon, 16 Sep 2024 23:42:43 +0100 Subject: [PATCH 78/90] Simplify type signature --- src/CSET/operators/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CSET/operators/plot.py b/src/CSET/operators/plot.py index 823bb5174..ff1648666 100644 --- a/src/CSET/operators/plot.py +++ b/src/CSET/operators/plot.py @@ -1069,7 +1069,7 @@ def scatter_plot( filename: str = None, one_to_one: bool = True, **kwargs, -) -> iris.cube.CubeList[iris.cube.Cube, iris.cube.Cube]: +) -> iris.cube.CubeList: """Plot a scatter plot between two variables. Both cubes must be 1D. From e77ab072336b9af74148ef072c09d426d1b97f48 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 00:47:21 +0100 Subject: [PATCH 79/90] Add a timeout to HTTP retriever --- pyproject.toml | 1 + src/CSET/_workflow_utils/fetch_data.py | 15 ++++++++++----- tests/workflow_utils/test_fetch_data.py | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 41b13ce24..42ad5a97a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ filterwarnings = [ # first CI run on each branch. "ignore::cartopy.io.DownloadWarning", ] +markers = ["network: marks tests that use external network resources"] minversion = "7" pythonpath = ["src"] testpaths = ["tests"] diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index a11c10442..02e9e207a 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -108,11 +108,16 @@ def get_file(self, file_path: str, output_dir: str) -> None: f"{output_dir.removesuffix('/')}/" + urllib.parse.urlparse(file_path).path.split("/")[-1] ) - with urllib.request.urlopen(file_path, context=ctx) as response: - with open(save_path, "wb") as fp: - # Read in 1 MiB chunks so data doesn't all have to be in memory. - while data := response.read(1024 * 1024): - fp.write(data) + try: + with urllib.request.urlopen(file_path, timeout=30, context=ctx) as response: + if response.status != 200: + raise OSError(f"Cannot retrieve URL: {response.status}") + with open(save_path, "wb") as fp: + # Read in 1 MiB chunks so data needn't fit in memory. + while data := response.read(1024 * 1024): + fp.write(data) + except OSError as err: + logging.warning("Failed to retrieve %s, error: %s", file_path, err) def _get_needed_environment_variables() -> dict: diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index e7b000504..efb2d40e5 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -189,6 +189,7 @@ def test_FilesystemFileRetriever_copy_error(caplog): assert log_record.message.startswith("Failed to copy") +@pytest.mark.network def test_HTTPFileRetriever(tmp_path): """Test retrieving a file via HTTP.""" url = "https://github.com/MetOffice/CSET/raw/48dc1d29846604aacb8d370b82bca31405931c87/tests/test_data/exeter_em01.nc" @@ -200,3 +201,16 @@ def test_HTTPFileRetriever(tmp_path): expected_hash = "67899970eeca75b9378f0275ce86db3d1d613f2bc7a178540912848dc8a69ca7" actual_hash = hashlib.sha256(file.read_bytes()).hexdigest() assert actual_hash == expected_hash + + +@pytest.mark.network +def test_HTTPFileRetriever_no_files(tmp_path, caplog): + """Test warning rather than error when requested URL does not exist.""" + with fetch_data.HTTPFileRetriever() as ffr: + # Should warn, but not error. + ffr.get_file("https://www.metoffice.gov.uk/CSET-404-testing", str(tmp_path)) + log_record = caplog.records[0] + assert log_record.levelname == "WARNING" + assert log_record.message.startswith( + "Failed to retrieve https://www.metoffice.gov.uk/CSET-404-testing, error:" + ) From 671d58f60d1821d8af7c5fcdd7ead12760178e91 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 02:33:44 +0100 Subject: [PATCH 80/90] Test handling of missing data period env var --- tests/workflow_utils/test_fetch_data.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index efb2d40e5..b403c03c4 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -55,14 +55,33 @@ def test_get_needed_environment_variables(monkeypatch): actual = fetch_data._get_needed_environment_variables() assert actual == expected, "Unexpected values from reading environment variables" + +def test_get_needed_environment_variables_data_period_handling(monkeypatch): + """Handle data_period dependent on date type.""" + duration_raw = "PT1H" + date_raw = "20000101T0000Z" + path = "/path/to/data" + number_raw = "1" + + monkeypatch.setenv("CSET_ANALYSIS_OFFSET", duration_raw) + monkeypatch.setenv("CSET_ANALYSIS_PERIOD", duration_raw) + monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", date_raw) + monkeypatch.setenv("CYLC_WORKFLOW_SHARE_DIR", path) + monkeypatch.setenv("DATA_PATH", path) + monkeypatch.setenv("MODEL_NUMBER", number_raw) + # Check DATA_PERIOD is not there for initiation. monkeypatch.setenv("DATE_TYPE", "initiation") - monkeypatch.delenv("DATA_PERIOD") initiation_actual = fetch_data._get_needed_environment_variables() assert ( initiation_actual["data_period"] is None ), "data_period should not be set for initiation time" + # Check exception when data period is not specified for validity time. + monkeypatch.setenv("DATE_TYPE", "validity") + with pytest.raises(KeyError): + fetch_data._get_needed_environment_variables() + def test_fetch_data(monkeypatch, tmp_path): """Test top-level fetch_data function with other calls mocked out.""" From 725e38894b643499093cf80501d8dc1b53ca061c Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 02:40:13 +0100 Subject: [PATCH 81/90] Use httpbin.org as the MO website just blocks the response --- tests/workflow_utils/test_fetch_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index b403c03c4..479288e82 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -227,9 +227,9 @@ def test_HTTPFileRetriever_no_files(tmp_path, caplog): """Test warning rather than error when requested URL does not exist.""" with fetch_data.HTTPFileRetriever() as ffr: # Should warn, but not error. - ffr.get_file("https://www.metoffice.gov.uk/CSET-404-testing", str(tmp_path)) + ffr.get_file("http://httpbin.org/status/404", str(tmp_path)) log_record = caplog.records[0] assert log_record.levelname == "WARNING" assert log_record.message.startswith( - "Failed to retrieve https://www.metoffice.gov.uk/CSET-404-testing, error:" + "Failed to retrieve http://httpbin.org/status/404, error:" ) From 32b0952526ddb3698c2f3a81395d9b8ad41c303b Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 03:06:51 +0100 Subject: [PATCH 82/90] Use ROSE_DATAC to locate per-cycle data dir --- src/CSET/_workflow_utils/fetch_data.py | 15 ++++++--------- src/CSET/_workflow_utils/run_cset_recipe.py | 5 ++--- tests/workflow_utils/test_fetch_data.py | 18 ++++++++---------- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/CSET/_workflow_utils/fetch_data.py b/src/CSET/_workflow_utils/fetch_data.py index 02e9e207a..82a83f3e3 100755 --- a/src/CSET/_workflow_utils/fetch_data.py +++ b/src/CSET/_workflow_utils/fetch_data.py @@ -134,9 +134,8 @@ def _get_needed_environment_variables() -> dict: "data_time": _fromisoformat(os.environ["CYLC_TASK_CYCLE_POINT"]), "forecast_length": isodate.parse_duration(os.environ["CSET_ANALYSIS_PERIOD"]), "forecast_offset": isodate.parse_duration(os.environ["CSET_ANALYSIS_OFFSET"]), - "share_dir": os.environ["CYLC_WORKFLOW_SHARE_DIR"], - "cycle_point": os.environ["CYLC_TASK_CYCLE_POINT"], "model_number": os.environ["MODEL_NUMBER"], + "rose_datac": os.environ["ROSE_DATAC"], } try: variables["data_period"] = isodate.parse_duration(os.environ["DATA_PERIOD"]) @@ -200,11 +199,11 @@ def fetch_data(file_retriever: FileRetrieverABC = FilesystemFileRetriever): * CSET_ANALYSIS_OFFSET * CSET_ANALYSIS_PERIOD * CYLC_TASK_CYCLE_POINT - * CYLC_WORKFLOW_SHARE_DIR * DATA_PATH * DATA_PERIOD * DATE_TYPE * MODEL_NUMBER + * ROSE_DATAC Parameters ---------- @@ -214,11 +213,9 @@ def fetch_data(file_retriever: FileRetrieverABC = FilesystemFileRetriever): v = _get_needed_environment_variables() # Prepare output directory. - cycle_share_data_dir = ( - f"{v['share_dir']}/cycle/{v['cycle_point']}/data/{v['model_number']}" - ) - os.makedirs(cycle_share_data_dir, exist_ok=True) - logging.debug("Output directory: %s", cycle_share_data_dir) + cycle_data_dir = f"{v['rose_datac']}/data/{v['model_number']}" + os.makedirs(cycle_data_dir, exist_ok=True) + logging.debug("Output directory: %s", cycle_data_dir) # Get file paths. paths = _template_file_path( @@ -234,4 +231,4 @@ def fetch_data(file_retriever: FileRetrieverABC = FilesystemFileRetriever): # Use file retriever to transfer data with multiple threads. with file_retriever() as retriever, ThreadPoolExecutor() as executor: for path in paths: - executor.submit(retriever.get_file, path, cycle_share_data_dir) + executor.submit(retriever.get_file, path, cycle_data_dir) diff --git a/src/CSET/_workflow_utils/run_cset_recipe.py b/src/CSET/_workflow_utils/run_cset_recipe.py index fe97c3814..4e1595e29 100755 --- a/src/CSET/_workflow_utils/run_cset_recipe.py +++ b/src/CSET/_workflow_utils/run_cset_recipe.py @@ -65,10 +65,9 @@ def output_directory(): def data_directory(): """Get the input data directory for the cycle.""" - share_directory = os.environ["CYLC_WORKFLOW_SHARE_DIR"] - cycle_point = os.environ["CYLC_TASK_CYCLE_POINT"] + rose_datac = os.environ["ROSE_DATAC"] model_number = os.environ["MODEL_NUMBER"] - return f"{share_directory}/cycle/{cycle_point}/data/{model_number}" + return f"{rose_datac}/data/{model_number}" def create_diagnostic_archive(output_directory): diff --git a/tests/workflow_utils/test_fetch_data.py b/tests/workflow_utils/test_fetch_data.py index 479288e82..ffe9432fe 100644 --- a/tests/workflow_utils/test_fetch_data.py +++ b/tests/workflow_utils/test_fetch_data.py @@ -35,14 +35,13 @@ def test_get_needed_environment_variables(monkeypatch): monkeypatch.setenv("CSET_ANALYSIS_OFFSET", duration_raw) monkeypatch.setenv("CSET_ANALYSIS_PERIOD", duration_raw) monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", date_raw) - monkeypatch.setenv("CYLC_WORKFLOW_SHARE_DIR", path) monkeypatch.setenv("DATA_PATH", path) monkeypatch.setenv("DATA_PERIOD", duration_raw) monkeypatch.setenv("MODEL_NUMBER", number_raw) + monkeypatch.setenv("ROSE_DATAC", path) monkeypatch.setenv("DATE_TYPE", "validity") expected = { - "cycle_point": date_raw, "data_period": duration, "data_time": date, "date_type": "validity", @@ -50,7 +49,7 @@ def test_get_needed_environment_variables(monkeypatch): "forecast_offset": duration, "model_number": number_raw, "raw_path": path, - "share_dir": path, + "rose_datac": path, } actual = fetch_data._get_needed_environment_variables() assert actual == expected, "Unexpected values from reading environment variables" @@ -66,9 +65,9 @@ def test_get_needed_environment_variables_data_period_handling(monkeypatch): monkeypatch.setenv("CSET_ANALYSIS_OFFSET", duration_raw) monkeypatch.setenv("CSET_ANALYSIS_PERIOD", duration_raw) monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", date_raw) - monkeypatch.setenv("CYLC_WORKFLOW_SHARE_DIR", path) monkeypatch.setenv("DATA_PATH", path) monkeypatch.setenv("MODEL_NUMBER", number_raw) + monkeypatch.setenv("ROSE_DATAC", path) # Check DATA_PERIOD is not there for initiation. monkeypatch.setenv("DATE_TYPE", "initiation") @@ -88,15 +87,14 @@ def test_fetch_data(monkeypatch, tmp_path): def mock_get_needed_environment_variables(): return { - "share_dir": str(tmp_path), - "cycle_point": "20000101T0000Z", - "model_number": "1", - "raw_path": None, - "date_type": None, + "data_period": None, "data_time": None, + "date_type": None, "forecast_length": None, "forecast_offset": None, - "data_period": None, + "model_number": "1", + "raw_path": None, + "rose_datac": f"{tmp_path}/cycle/20000101T0000Z", } def mock_template_file_path(*args, **kwargs): From 86a7406f00d35e1c8a10b44d4b88ede0be90a30a Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 03:11:25 +0100 Subject: [PATCH 83/90] Remove extraneous ) --- cset-workflow/meta/diagnostics/rose-meta.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cset-workflow/meta/diagnostics/rose-meta.conf b/cset-workflow/meta/diagnostics/rose-meta.conf index 480e6a0b5..36370eed1 100644 --- a/cset-workflow/meta/diagnostics/rose-meta.conf +++ b/cset-workflow/meta/diagnostics/rose-meta.conf @@ -157,7 +157,7 @@ ns=Diagnostics/Quicklook description=Start latitude, longitude of the cross section. help=The latitude, longitude coordinate in the model coordinate system where the cross section will start i.e. the furthest left hand point of the plot, - where the x axis is distance along transect, and y axis is pressure level). + where the x axis is distance along transect, and y axis is pressure level. type=real,real compulsory=true sort-key=1pressure6 @@ -167,7 +167,7 @@ ns=Diagnostics/Quicklook description=Finish latitude, longitude of the cross section. help=The latitude, longitude coordinate in the model coordinate system where the cross section will finish i.e. the furthest right hand point of the plot, - where the x axis is distance along transect, and y axis is pressure level). + where the x axis is distance along transect, and y axis is pressure level. type=real,real compulsory=true sort-key=1pressure6 @@ -245,7 +245,7 @@ ns=Diagnostics/Quicklook description=Finish latitude, longitude of the cross section. help=The latitude, longitude coordinate in the model coordinate system where the cross section will finish i.e. the furthest right hand point of the plot, - where the x axis is distance along transect, and y axis is pressure level). + where the x axis is distance along transect, and y axis is pressure level. type=real,real compulsory=true sort-key=2modellevel6 From b565a04b6d30d11e263ae7a688e739e01fad2e96 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 03:33:49 +0100 Subject: [PATCH 84/90] Clarify model name should be reasonably short --- cset-workflow/meta/rose-meta.conf | 44 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index a33a39ca4..87f1a3418 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -443,10 +443,10 @@ trigger=template variables=m01_name: this >= 1; # [template variables=m??_name] # ns=Models and Cases/Model ?? # title=Model name -# description=A friendly name for the model. +# description=A concise, friendly name for the model. # help=A recognisable name for this particular model. This is carried through to # the output webpage, and identifies the model. As it is only used for -# display purposes it can be any string. +# display purposes it can be any short string. # type=quoted # compulsory=true # sort-key=a0 @@ -551,10 +551,10 @@ trigger=template variables=m01_name: this >= 1; [template variables=m01_name] ns=Models and Cases/Model 01 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -657,10 +657,10 @@ sort-key=c2 [template variables=m02_name] ns=Models and Cases/Model 02 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -763,10 +763,10 @@ sort-key=c2 [template variables=m03_name] ns=Models and Cases/Model 03 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -869,10 +869,10 @@ sort-key=c2 [template variables=m04_name] ns=Models and Cases/Model 04 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -975,10 +975,10 @@ sort-key=c2 [template variables=m05_name] ns=Models and Cases/Model 05 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -1081,10 +1081,10 @@ sort-key=c2 [template variables=m06_name] ns=Models and Cases/Model 06 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -1187,10 +1187,10 @@ sort-key=c2 [template variables=m07_name] ns=Models and Cases/Model 07 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -1293,10 +1293,10 @@ sort-key=c2 [template variables=m08_name] ns=Models and Cases/Model 08 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -1399,10 +1399,10 @@ sort-key=c2 [template variables=m09_name] ns=Models and Cases/Model 09 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 @@ -1505,10 +1505,10 @@ sort-key=c2 [template variables=m10_name] ns=Models and Cases/Model 10 title=Model name -description=A friendly name for the model. +description=A concise, friendly name for the model. help=A recognisable name for this particular model. This is carried through to the output webpage, and identifies the model. As it is only used for - display purposes it can be any string. + display purposes it can be any short string. type=quoted compulsory=true sort-key=a0 From f50ddfe361139f5bfd076642dbbc6e7e4ef9fc67 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 04:09:32 +0100 Subject: [PATCH 85/90] Bundle recipes and example data into documentation This prevents needing to rely on external hosts for these. I did need to drop the example commands to fetch the resources via curl, but they were probably confusing anyway. --- docs/source/getting-started/air_temp.nc | Bin 0 -> 29314 bytes .../air_temperature_spatial_plot.yaml | 39 ++++++++++++++++++ .../getting-started/create-first-recipe.rst | 10 ++--- docs/source/getting-started/run-recipe.rst | 17 ++------ .../getting-started/visualise-recipe.rst | 5 +-- 5 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 docs/source/getting-started/air_temp.nc create mode 100644 docs/source/getting-started/air_temperature_spatial_plot.yaml diff --git a/docs/source/getting-started/air_temp.nc b/docs/source/getting-started/air_temp.nc new file mode 100644 index 0000000000000000000000000000000000000000..bcdc694f64fbf766d50942390306a74e2461267f GIT binary patch literal 29314 zcmeI54}4YCmGAdW0z$kZViZa#y~e1NVhorfB0?Y`1jK-lqNPX(+(44%-z0%jWE=(^ zWhi4QQXW-?p~^50MT$P2Whhd{;aQ3dkKyBU7}`=~7(`1gRX!FNr=9Qb?7eU9y-6gg z*w3f)$d!PRCtV#Jr_t;eb!Not{JrCbzOc#@CdYW!ZXnsv; zV^djUMQLN*{DwK@Mg>g=DWE22aAVHxl|ybH8XjRxSCp=@O7j{jqop-v_4Spt^8yX! zNE!`wO=V5x(bDR&rpl)I(el#jy4rccoiTsEtoK#QRya$tspiU91{RbzG*;Ht1~IAZ zifQoR>^n__o0iuto@HzKXJ7hdR^gPQ$wVjBpaHCeucGS|A=fTPg9;WyiN#&r|mL{8pW&lB(r;~TtCpS3(Plqi3c|&r#7rtzT(u20w z{kT7w{8-`Csbfu_G4@d?V$5;H9Qskn+^NL-f2Zb1&`qu~p-=`4lM)JrR1jUBre-KV z*F;7I?PE-y$-UC#{w@_O?4c*A1DiYK`f=7~Gp?|4EV0D+K9SzS>U|T3M@ux-WSB58 zrN!kHpCR9reWfAqw$mI;5&ieFdAxV!ZtU{XU5sU5gnmk%ONBjO(9k4*Ud6;#$# zMiX;p^~_yAef!VoiKDX2p;#T~>RC!^C`9E2mrP4ntgdY^Fu4T2YlF|mDxh~a)0_BB zmvq|(wqnjMw)Ov;E;gdixVrK6->vQXA1{p#)xR%JxzjtwI42?j5K=CNHRvl(rNbqCF4em3A&dClo(=LN~fUYOyC zjnMVO<`T#-6+s1Wt43q)E&ayN1C#i>FWCq}i)pu@5TU$~*%PGI|EIru+lp?NTm+xxx8RLJ|MeIhr7~jkL^gQ+Q z{w?D9`);xk>)ox(^L;#5T6T=@;|VA6eLQ|h@$q>19&O0WGIV+Q@8532{&np6J{~`G zdA?6K7(ZKyM;%O8vb;K)F3A8>{QF%!UvJqlzE4*jU6+^l7U>7@WWELnU7qjL z4d!NXd>_xJ1L>CXbk*SU@%VgxCW-Ik@1Ml?@}EuOKSUqb6$$>fO!tN)zKD!691NaEyI~(tbTkvHz&z^zMs_jdhq-KN%B5^2B_dlmLJ##pAjlS-q+`#B)(7g z)+D}{&rag|dUI^E!FM#z=%k%B&{+eWHPBfDoi)%|1D!R{Sp%Il&{+e2@EXuwLpaBn zD?VyJMT=go>+SZ2+ru`)gUq(%x7?3uRsYw0Bai>UZ4zpliX0-^e$$V8|2sW-{U8zk+@p3JO-_n_Oi3 zg0A3Z@QTx!1=al-bbpF&E*OgLW9S`#A3-+k^mY-Rz_ZoR*TFmBJUAnuzk_tPa(@Jz z1L^1;gr4BpLG-4Bjo>_K>;)w}JB!XX=qAa5qu^ce2G{~}z-eS>5l4S$Kj=PqFM?f! zQ^0fFuLlM!B>pH+{MpD(0m}(*=h?41hNX7{W`5`!TSd`z$A`Hd_Iz zz)|G;^XxR3PWWMD(?Jp8lVAe8BhU!6091f6pdaW1RM&Zw=PBp{idC_^9U>(GthBK$YVn`}*W zGlMd{vYa&OI|HegJY>}e(j1je3YZDhFVcbPezEk?eHA^~eNW`o=Q4oo`~%|J43&=Z z@Bw%aWPnWcBqO~Pm(Bt1Wd|F0wjA9Z(Bq_W0z}~#5Z(e+oa(E`fZ|gevP0!deWRN4 zO^2QVd*ST_YWF;#{`3-1oi8Ik^|>kVvj`W12EvlxAsxaQJX4#`f-Z#D4Xh?_vk7PN zybyYb=juC8fHmMnc)2|51y!Bx=YA7V|0#f~{)^#H1Is{HFchBbOnrO^Va0cv@CtNx zfwkOEfhQZz11o^^*C2lo$ZjeKA4PTx?J<(D^u?1sOGkZe7Em3lk0@P@7yT*Eb5Plo zWYj(?dp1f*nc+yo})Pre2?NkAktn}2r2f#6K4x9w)S9`%`(oug_Uy{ya;1w_vNJdx> z)K8B9$%RR8Ayj3N4d{N2yIK?7M$ggCvi;Y=d0@~{9BSj~Jd1!r;#Qy2xTN>Uad@vn_kdI2C{X+g zPX^Zm*<2p-&!MNWRq-DL>T{W3GxuA-2Jw(xt~5XN zL>jVD#Vz@>+~+_gI~>dcD$9A~qu>PcwaCc+R3|rsjogc>??ga9pfoojKM`z4XBje^ zfX0YSo^6FHpXVJUpCLWMYDc|8_M)eLrapZNs4jK`@gtTZ-MGsaTJ3EKsGlUECz~0eQr6>`%`7s^XGtUWHOilW`c%q1-na6i8P@);@iIkU8GZfSi(Wlec$O*xJ(m1VW9x-aOHFy#m?PhHT23+?yQ zt8CQeWerUgr441#%K42Qi7W4|dmk7bo?^d+e#*ubl(x1`5!TmL<6+~MDXqC!w3){} zWAegN?N{QyZNwY%-04unIdg-E~J(XX6esc2kD6w11CtI7xMt9-|#{ zIPun&ZjQM|te5$6t#Q=b>G$0yD*r{$d|3+KdSl!`w*!By%SlHR_|)gJ*0z@YBH#A& z7Ei)_p!7Yk#X~XVFX2CGw<>VinXk?r7ktDFf1%%KMc&iV4Crs>*YDXoX6&AoSNGks zbH~qqSTg1DAMW_xKkYvK_P0jv`p2)1Djaus)W1&4$m_m*RNmb$SLF>mzdY~oH#g)} zeg37qwEz2B-ujtu<`LzrjN2CEnI-=+=*>H!=GhC+7bx8H>Y_;s7d|;|y28C@Wt1!Y z_MkWG6yAB^`9%t^cy-ZVDE#1)<5nts+pLVI6*jlN`L_xmzVQ4u(fI4^C>P2T^<^{W z{3uhwgg}!9Ef(~$-vmBnfUN~uER+z|VnK`ZRbU1$u>xe17wCE!Re*zzYO(MHn1!xi zEM%hB18A|Z8#zr1HbY+rXMiRs%1d9M#lm)|CLdZXNaqmunz$U}*&?9D!Uv?G394Sy zPY~ARKyjXQknRb`(@XavvU|Bfcic zdB~_7i^02uSCSts7S?lL0#B0xO?p;?YH$?!empw^q@ziE3MeG3{Hx8>-g(f8Kw;f0 z9!>N(Iiwa=rQnAM!oDciO2#EfLbsGsDAxop$Zwj@YCRHlB0=*bTkoB86-al zsP46>nN1p-dA1E`a-lq|g`NOv580R|O42QYw-HPOeu1t!ke=H2AUFB=Zv30?vXXK+nR+FNCfHnz-nGjk{m$!V92^>i|$0B&)dUK>?87 z6c7Pgpr0j9^`Y0H8-dDqn7oznTy<4VT@0YVEd)8#{}^P|2eeoaq@((g{Tv3Wm+R48 zL>z8@`22j6IUzJ=0tmWCG=)MF!K^mHv$)=|% zE$A!YG&l}aHpMXw+$@TGE%I6{ya!Zf<@pWnWzXtQyMV?X*_rxS1^k=QnFbaR)^q8p zUA0&k1T;Zj4o`X40(}bhH4HZAxA?oCj8e>EH6!3rgo5umE0vPzcnY zs=z>?v8}6syf#9$uvvk85j@GuCWk=P*6OR$Q=ZQwuewmbV`3akoRxps(MH0GTYX2g z0Az!$$S(j|EUZIDHvcZj(lh8uARR&SN^=Cf<)8u_Mz0#GI#8c`0vrO$<9Q(cUEmF{ zgLK}5s((pW{Y0`+umkJ{>bvT%Y8Qpo=T<}4fXQGXkj>2n>T^p$6HtEC=am0vz;j>; zC;=0|OrSc6d++&ox~F=#>$$(iluRsSmcLz7E#_bH_Sw1dV#FMffSa;B{_>bR%>82E zrJyv2NeO+oqVMl5lgU(y#yda`PQAP)UW9hetkXrb>m~{=i4Ca{UYD zCt~1HC}+Ft&;9ZTCh#5`O^9pGKOcJgz~K?k+EC};PoxHd_V1oh?+Hzv(5eY7nqVS{ zO&f0-38t0M;6EC2?OmTpnK~nFe+}5>-s!mIo_8Ge)SoYCuW!3|$$!{oqIm)6mvYJU zGMp{YIARxisqH^o5XSdW{^H3_*mA+u0x~#e zc+y82D{JSJM+V<{N6xJ|gZV#_lQTT{UuA9M^ntbue4V87jts=+lvS7OltS?JTJKFs zU=H)@8rVp?oMl$Tx;#xT^UL-!sLtH~!@p>|5~Dac@D*S#`x|hc-R! zDX;D$Y#hmN!Y;8*%}?M>s3gM^?Z$iik6!70r7MAUvpjpEV{h^8^MjK>{*Y&g{mQqj z(%*;7{lO@4<&1{n=0itw+XfSDIsPuz>FjThtP}bbKhVnUOjZLNa^HkPF zjk$T%Km40*NzO|KX)9BH{?d{2k*_%~pL<=-{TwE>?UoA$!&b`H&euQ~eI1N!^JSp( zGb&E3)D7ix%NxphtCR-sOn)xP<#_DF^FN<_MD~I^!_|BU^`V=t{WGioxUBa|V3nt85MQzxwx6x-Fzkk=qZ4f_DJ)M-1b~H(F^rw|3k&&w^Yjg_lBP(4C zRY_KqSI(USHQ(*;Ki}o)nanFuy)_IFwM__3IbT-4S@CjkIZA;okbqr@}Ll zRul!yOCoa{>S`kQmN!Kv&7E7x%;Af*m2)f0qmg^-qUF_U1RVzq+c@&;Y8RB(HgUq+ zPB4A0^9yesoPCGZc8a#OE9b|neiGl>DelzO3};Q@Np=u-3C)Q+?JMUwoz+p)F`KkK zF%k6dvLb?3WQocTmQ2ACghKa)8zJYgDX`PT>gBZd)g*JJZ24cw2x@iA8i#v zX6MiE{8domE|R+vc631182j|<*sxlr~ZY$x`?1tAE;_t$)iV6-zpn z1*<|KbhbW1$O`7V_wJ4Exu;zs|8ZBaWzprIhy9%~PxJT&GrM3?cXI=ZpE89(11V0;)Wlm;Yan=ln zn`J(~?I-_bo(yj9``llD3;oXPa@Ii5{$2gV%r&O#1;({_>_e(;^ZS z@3Gv!_OW%+xEioCig!YxTru@?4SqE773v$MExw-;UV2N*&oa83uTHXA_>ak!9?i4V z&rhCp^OMuw)(RN`OfuIl46J5{{@Pzt=#a|C6xM#1ufTRiWhd_K5={`YzowADN($so zdhFR??zBRm3NGg!haW{e^?Mw?;8le-D?U87GBLb%#f2A@Guy5c{=_~Vrl#jojo2Tg zxWr7RmEV`p{I10YpyE|l?lC4O^XVJ7`|=0x{qJBuFMjIrUk|?b>rk36)743_`!a^o z{Gi#l?VlBzc_WE%h0^?xnx^H3Kh+;f^X7h4!fvTQ%^gbfV{MN_-z0ymJCx=}->Z|R zbAEd%l;#KI-Z6>9d8J>wgwp(2-PO)C`!LtGnfD_?S7p95#~(rurTO9fnxsl9&GAhi zO7nwy+U(kBqdy-XO7nyJl{U^&J^f0tPtlL{*TgbyXQ`z*7asI7e!RNAg-j4x*HSBZ zslRqW+z_{W;my@kKj5`*4eSTK9_Bi?V)Un8dN`Z%gWh%b*}VC}nm)2nKhyPGD}AX_ z#67n=`EiU(-G=E3zN{Z1Yq#(}LLm9krJc5J>44ML`23oGv~4x)(rtA^D=(+uH_%ek zHQ1z@`)1kPKI1a&TkRuuml)Bd>4ud{|M*gPx?QU#e;~;t8dY~mMP*}z|H~pAn#wOI zjKp@(BeUl>MQZDsnjbAFV}E*fb$NEgUqd?fgFSYzDm^g!jo6k#7I`o!?9cnQZ;N)! z{=Id7{Wis~>A$^v4X4B`+*`V*;jxnJ(3h0tEUSm9sBQ~!vLZC5$`UPSo39U#O?>{N zf0&KmZwfB|gHC?27^5ySKf*T0-TECFv(3O`8?}CzZ9tF*^FW&a2?<&fb%9YNgcT-TjUc0;Bmb{}j zw%nGyc`M-g%_o0kd3P0EJ5P5#oX174B+mXi!<>`I)itJyp!Y+ zv+tpmYvE<3anfRXLem!^x8F@uUfxN1+2p0o+)k%H_ z*h5%#B#*h_&|-M2o~j4(7+Zt9yr<+IvjMCC{XF$ghLSLn%lX{xqnE*C}4PZSu4t4w)T}3Oo$< zf@grzTM8Zk!+`We<;kn#O=l>B`oezbN^p^9yLqPLO??T^q)a>MbHk{69d86WoydkNiY|_jyEa3E#L@HpW6y#dlNw#kau4m`I~{xTCf-1POyjj zvpnc7gjbkrVQ!Mo@fFa)0JdNsWG<4xkpCI^8iP#mj~uLWl$ zOIUqq2z57*uMi0MzH?d3gd% z=h+iL^?oy0#eEj^AXNG63zSzKZz=*8Nk_K25uA21YoRB=d2q_{WFOhwt4)RjjUyFM z#V^@fup1l$>T~;n>grXnfppZLHi5Z7{o)lc6Kr>Q3i<{(0@OE^_9CFN6a$R|Yrq7c zv=##Oz1cwW>w#nycLOK@Bf%7q3I2y(?VFwX-&TyxtzvU5TP=Q7d#OrleRlT#EU)*c z{}g%DZpZt1Ve+iYOD6x+?NK+svNIuLJiX_ei(hsgq`%Y*>K6Ok#(@XvK}GgyJ7H|y zgY;FWqSSrhYtycGbC&uruI1v+GY+VE{pMz(z&BiuzRl8h%FV>sNw(ZfU zgMD|uMH)zr*;KIcV_RScHt1r0dNDs*y{J_dyC;nux66}Oh98ToHBZ`>z2VA)go6F* z%lTm@xjp=DJgEkkZyz6yRa&!qU^_jpR7Gukp3ywaS7tk1o-St{Xd0x;(@WKX|FB@S znr+sH{l_aG?xY{D1`_Aeeq`{}$DKTv{*h}=rpCeL>7O5Zey*EKf1n4vhtw^DiE*PM zEwBftx<9q@5%ZcPmMF zmjv$-4XUq|{^^fR@VTklj>dDI{N~&CRM2=HbLlkJ`tOA|+fe7GLTwCls`(lyJ-6EGsKau=I z|0NQQYZ`nsdR^XsCS;I{*?UReniS3^Y|OW>9cO~iR4e{a{v;}`F8>PmVgE|@;cG|F z>z(M%K^=YVs?SaerS-Bgm=>N2gMB1Upp*P@*Fezz-rX!HO8564AJ?H}g*uq7c2>npy%Ox+?()9d(fBq@9Y;SMc7+MI^V(MCf>l8a6&Q+{8@dr~>!XHN^Y>z`kX zxromxude1-shTS4+@e3zrES+!!y`4Uj%w!DLHcpiMzV;qH)42P;0piPFWB>pF literal 0 HcmV?d00001 diff --git a/docs/source/getting-started/air_temperature_spatial_plot.yaml b/docs/source/getting-started/air_temperature_spatial_plot.yaml new file mode 100644 index 000000000..410682fed --- /dev/null +++ b/docs/source/getting-started/air_temperature_spatial_plot.yaml @@ -0,0 +1,39 @@ +# Name of the recipe. +title: Surface air temperature spatial plot +# Category of recipe, used to group together multiple recipes in output. +category: Quick look +# Description will be displayed alongside output. +description: | + Extended description that can go across multiple lines. It is written in + [Markdown](https://commonmark.org/help/) and can thus contain links and + _formatting_. + +# Sequence of steps to run. +steps: + # Specify the operator to run in each step. + - operator: read.read_cubes + # Specify the name of the argument, and its value. + filename_pattern: "*.nc" + + - operator: filters.filter_cubes + # Can specify extra keyword arguments as sub-maps. + constraint: + operator: constraints.combine_constraints + var_constraint: + # Can nest in another operator to use its output as an argument, + # multiple levels deep if needed. + operator: constraints.generate_var_constraint + # Input implicitly taken from the previous step, but can be overridden + # by using the appropriate keyword argument. + varname: m01s03i236 + cell_method_constraint: + operator: constraints.generate_cell_methods_constraint + # Values can be more than just strings, such as this empty list. + cell_methods: [] + + # Save a sequence of plots, one per time. + - operator: plot.spatial_pcolormesh_plot + + # Save a single cube with all the processed data. + - operator: write.write_cube_to_nc + overwrite: True diff --git a/docs/source/getting-started/create-first-recipe.rst b/docs/source/getting-started/create-first-recipe.rst index 999fdb313..406bf783d 100644 --- a/docs/source/getting-started/create-first-recipe.rst +++ b/docs/source/getting-started/create-first-recipe.rst @@ -32,7 +32,7 @@ We will now create a recipe from scratch. This recipe will plot a specific timestamp of the example air temperature data. Start by opening a new file in your favourite text editor, and save it somewhere -accessible as ``single_timestep_surface_air_temperature_spatial_plot.yaml``. +accessible as ``surface_air_temperature_spatial_plot.yaml``. Recipe Metadata --------------- @@ -179,16 +179,14 @@ After following this far your recipe should look like this: Running the Recipe ------------------ -We can run this recipe using `the same data`_ as was used for the -:doc:`run-recipe` tutorial. - -.. _the same data: https://github.com/MetOffice/CSET/raw/main/tests/test_data/air_temp.nc +We can run this recipe using the same data as was used for the +:doc:`run-recipe` tutorial: :download:`air_temp.nc` Use ``cset bake`` to run your newly created recipe. .. code-block:: bash - cset bake -i air_temp.nc -o output/ -r single_timestep_surface_air_temperature_spatial_plot.yaml + cset bake -i air_temp.nc -o output/ -r surface_air_temperature_spatial_plot.yaml You can investigate the created plot and data file in the specified ``output`` directory. diff --git a/docs/source/getting-started/run-recipe.rst b/docs/source/getting-started/run-recipe.rst index a9be49d1a..b859fc8a9 100644 --- a/docs/source/getting-started/run-recipe.rst +++ b/docs/source/getting-started/run-recipe.rst @@ -9,17 +9,11 @@ this tutorial you will use CSET to plot the mean surface air temperature of a forecast. We will create a basic spatial plot of the mean surface air temperature, using a -pre-existing recipe. You can `download the recipe file here`_. +pre-existing recipe. Download the following recipe: +:download:`air_temperature_spatial_plot.yaml` -Now you need to find some data to process. You can `download an example file -here`_, or with the following command. - -.. code-block:: bash - - # Download recipe file. - curl -LO https://gist.githubusercontent.com/jfrost-mo/6e539d5be20bfa28342bf4ff82f24dea/raw/air_temperature_spatial_plot.yaml - # Download example data. - curl -LO https://github.com/MetOffice/CSET/raw/main/tests/test_data/air_temp.nc +Now you need to find some data to process. Download the following example file: +:download:`air_temp.nc` Now we are ready to run our recipe. This is where we use the ``cset bake`` command. This takes the input data file, an output path and the recipe file. The @@ -34,6 +28,3 @@ You can look at the visualised output with ``xdg-open output/index.html``. You've now successfully run CSET with a pre-existing recipe. In the next tutorial we will see what is going on inside. - -.. _download the recipe file here: https://gist.githubusercontent.com/jfrost-mo/6e539d5be20bfa28342bf4ff82f24dea/raw/air_temperature_spatial_plot.yaml -.. _download an example file here: https://github.com/MetOffice/CSET/raw/main/tests/test_data/air_temp.nc diff --git a/docs/source/getting-started/visualise-recipe.rst b/docs/source/getting-started/visualise-recipe.rst index 6e5f0d835..fc19a266b 100644 --- a/docs/source/getting-started/visualise-recipe.rst +++ b/docs/source/getting-started/visualise-recipe.rst @@ -6,7 +6,8 @@ Visualising a recipe graphically In this tutorial we will investigate what is going on inside of a recipe, and visualise the *operators* inside. -As in the previous tutorial `download this example recipe file`_. +As in the previous tutorial we can download this example recipe file: +:download:`air_temperature_spatial_plot.yaml` We will now visualise the steps inside the recipe using the ``cset graph`` command. @@ -52,5 +53,3 @@ Afterwards the cube passes to the ``plot.spatial_contour_plot`` and You now know how to visualise a recipe, and a little about the operators it is made up of. In the next tutorial you will learn to make your own recipe. - -.. _download this example recipe file: https://gist.githubusercontent.com/jfrost-mo/6e539d5be20bfa28342bf4ff82f24dea/raw/air_temperature_spatial_plot.yaml From d2d305be38dce8333ac6c6672b902bd7de6cbaf6 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 04:19:59 +0100 Subject: [PATCH 86/90] Remove preprocessing settings They are not implemented yet, and can be re-added when they are. --- cset-workflow/meta/rose-meta.conf | 231 +----------------------------- 1 file changed, 1 insertion(+), 230 deletions(-) diff --git a/cset-workflow/meta/rose-meta.conf b/cset-workflow/meta/rose-meta.conf index 87f1a3418..8d2c2a6b3 100644 --- a/cset-workflow/meta/rose-meta.conf +++ b/cset-workflow/meta/rose-meta.conf @@ -360,80 +360,60 @@ trigger=template variables=m01_name: this >= 1; template variables=m01_data_path: this >= 1; template variables=m01_date_type: this >= 1; template variables=m01_data_period: this >= 1; - template variables=m01_preprocessing: this >= 1; - template variables=m01_preprocessing_recipe: this >= 1; template variables=m02_name: this >= 2; template variables=m02_data_source: this >= 2; template variables=m02_data_path: this >= 2; template variables=m02_date_type: this >= 2; template variables=m02_data_period: this >= 2; - template variables=m02_preprocessing: this >= 2; - template variables=m02_preprocessing_recipe: this >= 2; template variables=m03_name: this >= 3; template variables=m03_data_source: this >= 3; template variables=m03_data_path: this >= 3; template variables=m03_date_type: this >= 3; template variables=m03_data_period: this >= 3; - template variables=m03_preprocessing: this >= 3; - template variables=m03_preprocessing_recipe: this >= 3; template variables=m04_name: this >= 4; template variables=m04_data_source: this >= 4; template variables=m04_data_path: this >= 4; template variables=m04_date_type: this >= 4; template variables=m04_data_period: this >= 4; - template variables=m04_preprocessing: this >= 4; - template variables=m04_preprocessing_recipe: this >= 4; template variables=m05_name: this >= 5; template variables=m05_data_source: this >= 5; template variables=m05_data_path: this >= 5; template variables=m05_date_type: this >= 5; template variables=m05_data_period: this >= 5; - template variables=m05_preprocessing: this >= 5; - template variables=m05_preprocessing_recipe: this >= 5; template variables=m06_name: this >= 6; template variables=m06_data_source: this >= 6; template variables=m06_data_path: this >= 6; template variables=m06_date_type: this >= 6; template variables=m06_data_period: this >= 6; - template variables=m06_preprocessing: this >= 6; - template variables=m06_preprocessing_recipe: this >= 6; template variables=m07_name: this >= 7; template variables=m07_data_source: this >= 7; template variables=m07_data_path: this >= 7; template variables=m07_date_type: this >= 7; template variables=m07_data_period: this >= 7; - template variables=m07_preprocessing: this >= 7; - template variables=m07_preprocessing_recipe: this >= 7; template variables=m08_name: this >= 8; template variables=m08_data_source: this >= 8; template variables=m08_data_path: this >= 8; template variables=m08_date_type: this >= 8; template variables=m08_data_period: this >= 8; - template variables=m08_preprocessing: this >= 8; - template variables=m08_preprocessing_recipe: this >= 8; template variables=m09_name: this >= 9; template variables=m09_data_source: this >= 9; template variables=m09_data_path: this >= 9; template variables=m09_date_type: this >= 9; template variables=m09_data_period: this >= 9; - template variables=m09_preprocessing: this >= 9; - template variables=m09_preprocessing_recipe: this >= 9; template variables=m10_name: this >= 10; template variables=m10_data_source: this >= 10; template variables=m10_data_path: this >= 10; template variables=m10_date_type: this >= 10; template variables=m10_data_period: this >= 10; - template variables=m10_preprocessing: this >= 10; - template variables=m10_preprocessing_recipe: this >= 10; ################################################################################ @@ -525,6 +505,7 @@ trigger=template variables=m01_name: this >= 1; # compulsory=true # sort-key=b2 +# TODO: Include this in https://github.com/MetOffice/CSET/issues/835 # [template variables=m??_preprocessing] # ns=Models and Cases/Model ?? # title=Preprocess @@ -631,27 +612,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m01_preprocessing] -ns=Models and Cases/Model 01 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m01_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m01_preprocessing_recipe] -ns=Models and Cases/Model 01 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 02 [template variables=m02_name] @@ -737,27 +697,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m02_preprocessing] -ns=Models and Cases/Model 02 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m02_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m02_preprocessing_recipe] -ns=Models and Cases/Model 02 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 03 [template variables=m03_name] @@ -843,27 +782,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m03_preprocessing] -ns=Models and Cases/Model 03 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m03_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m03_preprocessing_recipe] -ns=Models and Cases/Model 03 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 04 [template variables=m04_name] @@ -949,27 +867,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m04_preprocessing] -ns=Models and Cases/Model 04 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m04_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m04_preprocessing_recipe] -ns=Models and Cases/Model 04 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 05 [template variables=m05_name] @@ -1055,27 +952,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m05_preprocessing] -ns=Models and Cases/Model 05 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m05_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m05_preprocessing_recipe] -ns=Models and Cases/Model 05 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 06 [template variables=m06_name] @@ -1161,27 +1037,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m06_preprocessing] -ns=Models and Cases/Model 06 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m06_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m06_preprocessing_recipe] -ns=Models and Cases/Model 06 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 07 [template variables=m07_name] @@ -1267,27 +1122,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m07_preprocessing] -ns=Models and Cases/Model 07 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m07_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m07_preprocessing_recipe] -ns=Models and Cases/Model 07 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 08 [template variables=m08_name] @@ -1373,27 +1207,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m08_preprocessing] -ns=Models and Cases/Model 08 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m08_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m08_preprocessing_recipe] -ns=Models and Cases/Model 08 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 09 [template variables=m09_name] @@ -1479,27 +1292,6 @@ type=quoted compulsory=true sort-key=b2 -[template variables=m09_preprocessing] -ns=Models and Cases/Model 09 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m09_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m09_preprocessing_recipe] -ns=Models and Cases/Model 09 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 - # Model 10 [template variables=m10_name] @@ -1584,24 +1376,3 @@ help=The period of the input data in each file as an ISO 8601 duration. This type=quoted compulsory=true sort-key=b2 - -[template variables=m10_preprocessing] -ns=Models and Cases/Model 10 -title=Preprocess -description=Preprocess all of the model data. NOTE: Not yet implemented. -help=Whether to preprocess all of the model data. This is useful for applying - any necessary transformations to the data before it is used in the workflow, - such as removing boundary regions. -type=python_boolean -compulsory=true -trigger=template variables=m10_preprocessing_recipe: True; -sort-key=c1 - -[template variables=m10_preprocessing_recipe] -ns=Models and Cases/Model 10 -title=Preprocessing recipe -description=The preprocessing recipe to use. NOTE: Not yet implemented. -help=The preprocessing recipe to use. -type=quoted -compulsory=true -sort-key=c2 From 7d94c590c060a675cee29bcbaa4c2bb46b0d0f4c Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 04:28:47 +0100 Subject: [PATCH 87/90] Set ROSE_DATAC in data_directory test --- tests/workflow_utils/test_run_cset_recipe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/workflow_utils/test_run_cset_recipe.py b/tests/workflow_utils/test_run_cset_recipe.py index 7519123df..9b3b1829b 100644 --- a/tests/workflow_utils/test_run_cset_recipe.py +++ b/tests/workflow_utils/test_run_cset_recipe.py @@ -84,8 +84,7 @@ def mock_recipe_id(): def test_data_directory(monkeypatch): """Data directory correctly interpreted.""" - monkeypatch.setenv("CYLC_WORKFLOW_SHARE_DIR", "/share") - monkeypatch.setenv("CYLC_TASK_CYCLE_POINT", "20000101T0000Z") + monkeypatch.setenv("ROSE_DATAC", "/share/cycle/20000101T0000Z") monkeypatch.setenv("MODEL_NUMBER", "1") expected = "/share/cycle/20000101T0000Z/data/1" actual = run_cset_recipe.data_directory() From a1bc3ce65241354128d8c4c61aefe3ca52ca6597 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 07:17:27 +0100 Subject: [PATCH 88/90] Make area constraint much stricter, and handle date line The operator now requires all arguments to either be numbers, or all be None if you don't want to constrain the area. The test has been mildly improved, so at least it checks for both of the named functions on the coordinates, though it still doesn't test that they do anything. --- src/CSET/operators/constraints.py | 57 ++++++++++++++++++++++------- tests/operators/test_constraints.py | 12 +++++- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/CSET/operators/constraints.py b/src/CSET/operators/constraints.py index 5c7cacb3b..fa8e80dda 100644 --- a/src/CSET/operators/constraints.py +++ b/src/CSET/operators/constraints.py @@ -14,6 +14,7 @@ """Operators to generate constraints to filter with.""" +import numbers import re from collections.abc import Iterable from datetime import datetime @@ -177,10 +178,10 @@ def generate_time_constraint( def generate_area_constraint( - lat_start: float | str, - lat_end: float | str, - lon_start: float | str, - lon_end: float | str, + lat_start: float | None, + lat_end: float | None, + lon_start: float | None, + lon_end: float | None, **kwargs, ) -> iris.Constraint: """Generate an area constraint between latitude/longitude limits. @@ -189,29 +190,59 @@ def generate_area_constraint( constraint that selects grid values only inside that area. Works with the data's native grid so is defined within the rotated pole CRS. + Alternatively, all arguments may be None to indicate the area should not be + constrained. This is useful to allow making subsetting an optional step in a + processing pipeline. + Arguments --------- - lat_start: float + lat_start: float | None Latitude value for lower bound - lat_end: float + lat_end: float | None Latitude value for top bound - lon_start: float + lon_start: float | None Longitude value for left bound - lon_end: float + lon_end: float | None Longitude value for right bound Returns ------- area_constraint: iris.Constraint """ - if lat_start is None: + # Check all arguments are defined, or all are None. + if not ( + all( + ( + isinstance(lat_start, numbers.Real), + isinstance(lat_end, numbers.Real), + isinstance(lon_start, numbers.Real), + isinstance(lon_end, numbers.Real), + ) + ) + or all((lat_start is None, lat_end is None, lon_start is None, lon_end is None)) + ): + raise TypeError("Bounds must real numbers, or all None.") + + # Don't constrain area if all arguments are None. + if lat_start is None: # Only need to check once, as they will be the same. + # An empty constraint allows everything. return iris.Constraint() + # Handle bounds crossing the date line. + if lon_end < lon_start: + lon_end = lon_end + 360 + + def bound_lat(cell: iris.coords.Cell) -> bool: + return lat_start < cell < lat_end + + def bound_lon(cell: iris.coords.Cell) -> bool: + # Adjust cell values to handle crossing the date line. + if cell < lon_start: + cell = cell + 360 + return lon_start < cell < lon_end + area_constraint = iris.Constraint( - coord_values={ - "grid_latitude": lambda cell: lat_start < cell < lat_end, - "grid_longitude": lambda cell: lon_start < cell < lon_end, - } + coord_values={"grid_latitude": bound_lat, "grid_longitude": bound_lon} ) return area_constraint diff --git a/tests/operators/test_constraints.py b/tests/operators/test_constraints.py index 77d300a0d..b2bca8c91 100644 --- a/tests/operators/test_constraints.py +++ b/tests/operators/test_constraints.py @@ -114,8 +114,16 @@ def test_generate_level_constraint_no_pressure(): def test_generate_area_constraint(): """Generate area constraint with lat-lon limits.""" area_constraint = constraints.generate_area_constraint(0.0, 0.0, 0.1, 0.1) - expected_area_constraint = "Constraint(coord_values={'grid_latitude': . at" - assert expected_area_constraint in repr(area_constraint) + actual = repr(area_constraint) + assert "Constraint(coord_values={" in actual + assert ( + "'grid_latitude': .bound_lat at 0x" + in actual + ) + assert ( + "'grid_longitude': .bound_lon at 0x" + in actual + ) def test_generate_area_constraint_no_limits(): From 638f0185de052b83e207d96436bff00aa0d37c75 Mon Sep 17 00:00:00 2001 From: James Frost Date: Tue, 17 Sep 2024 07:26:52 +0100 Subject: [PATCH 89/90] Test invalid arguments to area constraint --- tests/operators/test_constraints.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/operators/test_constraints.py b/tests/operators/test_constraints.py index b2bca8c91..b1b2014cd 100644 --- a/tests/operators/test_constraints.py +++ b/tests/operators/test_constraints.py @@ -16,6 +16,8 @@ from datetime import datetime +import pytest + from CSET.operators import constraints @@ -133,6 +135,17 @@ def test_generate_area_constraint_no_limits(): assert expected_area_constraint in repr(area_constraint) +def test_generate_area_constraint_invalid_arguments(): + """Generate area constraint raises exception with invalid arguments.""" + # Non-numbers are rejected. + with pytest.raises(TypeError): + constraints.generate_area_constraint(1, 2, 3, "four") + + # Mixed numbers and Nones are rejected. + with pytest.raises(TypeError): + constraints.generate_area_constraint(None, None, None, 0) + + def test_combine_constraints(): """Combine constraint.""" stash_constraint = constraints.generate_stash_constraint("m01s03i236") From f18255b60cd93dd5f1f21966285df61e584267be Mon Sep 17 00:00:00 2001 From: James Frost Date: Fri, 20 Sep 2024 04:48:20 +0100 Subject: [PATCH 90/90] Make fetch-data-http.py executable --- cset-workflow/app/fetch_fcst/bin/fetch-data-http.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 cset-workflow/app/fetch_fcst/bin/fetch-data-http.py diff --git a/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py b/cset-workflow/app/fetch_fcst/bin/fetch-data-http.py old mode 100644 new mode 100755