diff --git a/python/lib/evaluations/README.md b/python/lib/evaluations/README.md index e69de29bb..475e380dc 100644 --- a/python/lib/evaluations/README.md +++ b/python/lib/evaluations/README.md @@ -0,0 +1 @@ +# Distributed Model on Demand - Evaluations \ No newline at end of file diff --git a/python/lib/evaluations/dmod/evaluations/__init__.py b/python/lib/evaluations/dmod/evaluations/__init__.py index ad03cefaf..8b1378917 100644 --- a/python/lib/evaluations/dmod/evaluations/__init__.py +++ b/python/lib/evaluations/dmod/evaluations/__init__.py @@ -1,49 +1 @@ -#!/usr/bin/env python3 -import typing -from argparse import ArgumentParser - - -class Arguments(object): - def __init__(self, *args): - self.__option: typing.Optional[str] = None - - self.__parse_command_line(*args) - - @property - def option(self) -> str: - return self.__option - - def __parse_command_line(self, *args): - parser = ArgumentParser("Put a description for your script here") - - # Add options - parser.add_argument( - "-o", - metavar="option", - dest="option", - type=str, - default="default", - help="This is an example of an option" - ) - - # Parse the list of args if one is passed instead of args passed to the script - if args: - parameters = parser.parse_args(args) - else: - parameters = parser.parse_args() - - # Assign parsed parameters to member variables - self.__option = parameters.option - - -def main(): - """ - Define your initial application code here - """ - arguments = Arguments() - - -# Run the following if the script was run directly -if __name__ == "__main__": - main() diff --git a/python/lib/evaluations/dmod/evaluations/specification/README.md b/python/lib/evaluations/dmod/evaluations/specification/README.md new file mode 100644 index 000000000..f16b18cd3 --- /dev/null +++ b/python/lib/evaluations/dmod/evaluations/specification/README.md @@ -0,0 +1,1342 @@ +# Specification + +Evaluation workflows are highly configurable via the use of required evaluation specifications. There are a lot +of different options, but this level of complexity may be mitigated through the use of [templates](#templates). + +## Table of Contents + +- [A Word on Templates](#templates) +- [Querying Data](#querying) +- [Evaluation Specification](#EvaluationSpecification) + - [Examples](#EvaluationSpecificationExamples) +- [Data Source Specification](#DataSourceSpecification) + - [Examples](#DataSourceSpecificationExamples) +- [Backend Specification](#BackendSpecification) + - [Examples](#BackendSpecificationExamples) +- [Associated Field](#AssociatedField) + - [How to Use Paths](#AssociatedFieldPaths) + - [Examples](#AssociatedFieldExamples) +- [Field Mapping Specification](#FieldMappingSpecification) + - [Examples](#FieldMappingSpecificationExamples) +- [Value Selector](#ValueSelector) + - [How to Use Paths](#ValueSelectorPaths) + - [Examples](#ValueSelectorExamples) +- [Crosswalk Specification](#CrosswalkSpecification) + - [Examples](#CrosswalkSpecificationExamples) +- [Location Specification](#LocationSpecification) + - [Examples](#LocationSpecificationExamples) +- [Metric Specification](#MetricSpecification) + - [Examples](#MetricSpecificationExamples) +- [Threshold Specification](#ThresholdSpecification) + - [Examples](#ThresholdSpecificationExamples) +- [Threshold Definition](#ThresholdDefinition) + - [Examples](#ThresholdDefinitionExamples) +- [Threshold Application Rules](#ThresholdApplicationRules) + - [Examples](#ThresholdApplicationRulesExamples) +- [Unit Definition](#UnitDefinition) + - [Examples](#UnitDefinitionExamples) +- [Scheme Specification](#SchemeSpecification) + - [Examples](#SchemeSpecification) +- [All Specification Elements](#all-elements) + + +## A word on templates + +Templating in evaluation specifications is a means of using preconfigured logic within new configurations. +Many configurations may be the same or they may be mostly the same. Configuring full or partial configurations +and attaching a template name to a configuration will apply the template settings prior to the application of +passed configurations. + +Templates are supported on any model that has the `template_name` property. To use an existing template, +all that must be done to include it is to set the value of `template_name` to it: + +```json +{ + "observations": [ + { + "template_name": "Observation Template" + } + ] +} +``` + +Templates are environment specific - one environment may have an important template while another might not, +but the templates are configurable, so more and more may be created as new use cases arise. Template Manager +constructs (such as the [FileTemplateManager](template.py)) provide all the means necessary to find out what templates are +available. Services providing access to evaluations should provide querying capabilities so that templates may be +reused as much as possible. + + +## Querying Data + +Structured data, such as with JSON, may be queried with the help of [JSONPath](https://goessner.net/articles/JsonPath/), +a query language used as an analog to [XPaths](https://en.wikipedia.org/wiki/XPath). + +Given a document like: + +```json +{ + "a": 5, + "b": [ + { + "value1": 1, + "value2": { + "value3": "This is another value" + }, + "value3": false + }, + { + "value1": 2, + "value2": { + "value3": "This is yet another value that we'll use as an example" + }, + "value3": true + }, + { + "value1": 47, + "value2": { + "value3": "Look at this awesome value!" + }, + "value3": true + } + ] +} +``` + +the query `"$.b[1].value2.value3"` will yield '"This is yet another value that we'll use as an example"'. The +`$` character instructs the search operations to start looking at the root of the document. The next instruction, +`b` tells the search operation to look for values under `b`. `[1]` tells the operation to then look in the +second member of the collection held under `b`. `value2` tells the search process to _then_ search under the `value2` +object where the final `value3` instruction retrieves the value belonging to `value3`. + +The equivalent hardcoded instructions in python would be: + +```python +example = { + "a": 5, + "b": [ + { + "value1": 1, + "value2": { + "value3": "This is another value" + }, + "value3": False + }, + { + "value1": 2, + "value2": { + "value3": "This is yet another value that we'll use as an example" + }, + "value3": True + }, + { + "value1": 47, + "value2": { + "value3": "Look at this awesome value!" + }, + "value3": True + } + ] +} + +equivalent_value = example['b'][1]['value2']['value3'] +print(equivalent_value) +# Output: This is yet another value that we'll use as an example +``` + +Queries don't have to start at the root, but it _is_ advised. a query such as `"value1"` would yield `[1, 2, 47]`, but +a query of `"value3"` would yield `["This is another value", false, "This is yet another value that we'll use as an +example", true, "Look at this awesome value!", true]`. + +Investigate [Associated Fields](#AssociatedField) and [Value Selectors](#ValueSelector) to see how paths are used in +practice. + + +## Evaluation Specification + +![Instructions for how different aspect of an evaluation should work](../../../images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png) + +The [Evaluation Specification](evaluation.py) is the primary unit of configuration required in order to carry +out evaluations. This element contains every bit of needed information, from where to load what data to what +metrics to run on it. Each evaluation will require one and only one evaluation specification. + + +## Data Source Specification + +![Specification for where to get the actual data for evaluation](../../../images/dmod.evaluations.specification.data.DataSourceSpecification.png) + +The [Data Source Specification](data.py) is the block of configuration responsible for loading a set of data +that will be joined with others for evaluation. Loading data by using instructions provided by a `DataSourceSpecification` +will yield a Data Frame that will be ready for manipulation. + +The most important aspects defined by a `DataSourceSpecification` are: + +1. What fields to load +2. What the data is [measured in or how to find out](#unit-definition) +3. What locations are represented within the data or how to find out +4. How to load and interpret the raw data +5. What field within the resultant data should be used for all calculations. + + +## Backend Specification + +![A specification for how data should be loaded](../../../images/dmod.evaluations.specification.backend.BackendSpecification.png) + +[Backend Specifications](backend.py) dictate how data is loaded. As of writing, there are two different +backend types: files and `REST`. Unlike a lot of the other configuration types, the `properties` attribute may be +truly important in that some readers require extra information. `REST` calls may require extra parameters. +For instance, reaching out to NWIS for isntantaneous streamflow data may require information, such as a comma +delimited list of sites to retrieve data for, a `startDT` and `endDT` to indicate the timeframe for what data to +retrieve, and `parameterCd`, which will dictate what data to pull back (`00060` would be streamflow). + +Expect these parameters to be more important to services than local files due to the further complexity due to +querying. + + +### Example +Load a local RDB file stored at "resources/nwis_stat_thresholds.rdb": +```json +{ + "backend_type": "file", + "format": "rdb", + "address": "resources/nwis_stat_thresholds" +} +``` + +Retrieve streamflow data from NWIS' Instantaneous Values service for locations "0214657975" and +"0214655255", with values ocurring between midnight 2022-12-01 and midnight 2022-12-31: +```json +{ + "backend_type": "rest", + "format": "json", + "address": "https://nwis.waterservices.usgs.gov/nwis/iv", + "params": { + "format": "json", + "indent": "on", + "sites": "0214657975,0214655255", + "startDT": "2022-12-01T00:00%2b0000", + "endDT": "2022-12-31T00:00%2b0000", + "parameterCd": "00060" + } +} +``` + +Use the "Instantaneous NWIS Streamflow" template to retrieve streamflow data from location "0214657975" +from between midnight 2023-09-01 and midnight 2023-09-14: +```json +{ + "template_name": "Instantaneous NWIS Streamflow", + "params": { + "sites": "0214657975", + "startDT": "2023-09-01T00:00%2b0000", + "endDT": "2023-09-14T00:00%2b0000" + } +} +``` + +Retrieve data "path/to/file.json" in the style handled by the "JSON File" template +```json +{ + "template_name": "JSON File", + "address": "path/to/file.json" +} +``` + + +## Associated Field + +![A specification for additional data that should accompany selected data](../../../images/dmod.evaluations.specification.fields.AssociatedField.png) + +Configuring [Associated Fields](fields.py) helps dictate what data should be placed with the selected values. +Retrieving data via a [Value Selector](#ValueSelector) might return a series of values, but may not contain the +collection of values necessary for context. For example, selecting values in NWIS JSON may be performed by gathering +data from `"values[*].value[*].value"`, but that doesn't tell you _when_ those values occurred. If you associate +those values with the date times from `"values[*].value[*].dateTime"`, however, you'll now have a set of data containing +values _and_ context. + + +### Example + +Use data at `sourceInfo.siteCode[0].value`, starting from the current origin, as the accompanying location +for the currently identified measurement +```json +{ + "name":"observation_location", + "path": ["sourceInfo", "siteCode", "[0]", "value"], + "datatype": "string" +} +``` + +Consider the adjacent `date` field as a `datetime` object for read measurements +```json +{ + "name": "date", + "datatype": "datetime" +} +``` + + +## Field Mapping Specification + +![Details on how a field should be aliased](../../../images/dmod.evaluations.specification.fields.FieldMappingSpecification.png) + +Field mapping allows the renaming of fields that will appear during evaluation and in outputs. Some sources provide +hard to understand or uncommon names. For instance, some inputs will have a variable with the name `Q_out` that +will need to be used alongside data named `streamflow` or `discharge`. Renaming fields allow for common terms +across a multitude of different types of evaluations and allow for easier downstream tooling. + + +### Example + +Rename the "date" field as "value_date" for processing +```json +{ + "field": "value_date", + "map_type": "column", + "value": "date" +} +``` + +Consider the value of "site_no" as the field named "location" +```json +{ + "field": "location", + "map_type": "value", + "value": "site_no" +} +``` + + +## Value Selector + +![Instructions for how to retrieve values from a data source](../../../images/dmod.evaluations.specification.fields.ValueSelector.png) + +[Value Selectors](fields.py) are the most important part of any data source configuration: they define what fields +should be loaded and how. Data that has been loaded from a backend may provide raw values, but it is the job +of Value Selectors to investigate that data and extract what to use. Say a CSV file is used and it contains fields like +`d`, `flow`, `temperature`, `dew point`, `age`, `altitude`, `loc`, `turpidity`, `region`, `state`, `operator`, and +`stage`, that looks like: + +| d | flow | temperature | dew point | age | altitude | loc | turpidity | region | state | operator | stage | +|------------------|------|-------------|-----------|-----|----------|-------------------|-----------|--------|-------|----------|-------| +| 2023/10/01 00:00 | 37.8 | 25 | | 45 | 1054.24 | SOME LOCATION, AL | `null` | SERFC | AL | USACE | 14 | +| 2023/10/01 01:00 | 37.6 | 24.8 | | 55 | 1054.24 | SOME LOCATION, AL | `null` | SERFC | AL | USACE | 13 | + + +A configuration like: + +```json +{ + "name": "streamflow", + "where": "column", + "path": "flow", + "datatype": "float", + "associated_fields": [ + { + "name": "value_date", + "path": "d", + "datatype": "datetime" + }, + { + "name": "location", + "path": "loc", + "datetype": "string" + } + ] +} +``` + +will transform that into: + +| value_date | streamflow | location | +|--------------------------|------------|-------------------| +| 2023-10-01 00:00:00-0000 | 37.8 | SOME LOCATION, AL | +| 2023-10-01 01:00:00-0000 | 37.6 | SOME LOCATION, AL | + + +### Example + +Use each value located at `"values[*].value[*].value"`, starting from every node found at +`"$.value.timeSeries[*]"` as a floating point number used for a field named `observation`. When selecting that value, +also select `"values[*].value[*].datetime"` as a `datetime` field named `"value_date"`, `"sourceInfo.siteCode[0].value"` +as a string for a field named 'observation_location', and `"variable.unit.unitCode"` as a string field named "unit". + +```json +{ + "name": "observation", + "where": "value", + "path": ["values[*]", "value[*]", "value"], + "datatype": "float", + "origin": ["$", "value", "timeSeries[*]"], + "associated_fields": [ + { + "name":"value_date", + "path": ["values[*]", "value[*]", "dateTime"], + "datatype": "datetime" + }, + { + "name":"observation_location", + "path": ["sourceInfo", "siteCode", "[0]", "value"], + "datatype": "string" + }, + { + "name":"unit", + "path": ["variable", "unit", "unitCode"], + "datatype": "string" + } + ] +} +``` + +This will select values that might look like: + +| observation | value_date | observation_location | unit | +|-------------|-------------------------------|----------------------|-------| +| 46.9 | 2015-11-30T20:00:00.000-05:00 | 0214655255 | ft3/s | +| 50.2 | 2015-11-30T20:05:00.000-05:00 | 0214655255 | ft3/s | +| 48.2 | 2015-11-30T20:10:00.000-05:00 | 0214655255 | ft3/s | + +The following might yield the same result: +```json +{ + "name": "observation", + "where": "value", + "path": ["values[*]", "value[*]", "value"], + "datatype": "float", + "origin": ["$", "value", "timeSeries[*]"], + "associated_fields": [ + { + "template_name": "NWIS Value Date" + }, + { + "template_name": "NWIS Observation Location" + }, + { + "template_name": "NWIS Unit" + } + ] +} +``` + +Use the column named '"predicted"' and match it with the adjacent column named '"date"': +```json +{ + "name": "predicted", + "where": "column", + "associated_fields": [ + { + "name": "date", + "datatype": "datetime" + } + ] +} +``` + + +## Crosswalk Specification + +![Specifies how locations in the observations should be linked to locations in the predictions](../../../images/dmod.evaluations.specification.locations.CrosswalkSpecification.png) + +[Crosswalk Specifications](locations.py) inform the system about how it should link observations to forecasts based +on location by loading up data to link + +Say you have the following two data sets: + +**Predictions** + +| loc | value | unit | time | +|-----|-------|--------|-------------------| +| 1 | 324 | ft3/s | 2021-10-14 00:00 | +| 1 | 322 | ft3/s | 2021-10-14 01:00 | +| 2 | 14 | ft3/s | 2021-10-14 00:00 | +| 2 | 13 | ft3/s | 2021-10-14 01:00 | + +**Observations** + +| site_no | measurement | unit | stage | valid_time | +|------------|-------------|-------|-------|------------------| +| 0446846846 | 14 | cms | 8 | 2021-10-14 00:00 | +| 0446846846 | 18 | cms | 9 | 2021-10-14 01:00 | +| 668465168 | 7 | cms | 5 | 2021-10-14 00:00 | +| 668465168 | 6 | cms | 4.8 | 2021-10-14 01:00 | + + +A Crosswalk Specification like: +```json +{ + "backend": { + "backend_type": "file", + "address": "resources/crosswalk.json", + "format": "json" + }, + "observation_field_name": "site_no", + "prediction_field_name": "loc", + "field": { + "name": "loc", + "where": "key", + "path": ["* where site_no"], + "origin": "$", + "datatype": "string", + "associated_fields": [ + { + "name": "site_no", + "path": "site_no", + "datatype": "string" + } + ] + } +} +``` + +will read the data from `"resources/crosswalk.json"` that looks like: + +```json +{ + "1": { + "value1": 1, + "site_no": "668465168" + }, + "2": { + "value1": 2, + "site_no": "0446846846" + }, + "3": { + "value1": 3 + }, + "4": { + "value1": 4 + } +} +``` + +And determine that rows from the **Prediction** dataset with a `loc` value of `1` should link to rows from the +**Observation** dataset with a `site_no` value of `668465168`. The keys `"3"` and `"4"` will be totally ignored since +the path `* where site_no` means "Everything that has a member named `site_no`" and `"3"` and `"4"` lack the member. + + +### Examples + +Load the local `JSON` file at "resources/crosswalk.json" and extract the keys found at `"* where site_no"` +(everything that has a `site_no` field) to use as a "prediction_location" and use its contained value +`"site_no"` as a field named "observation_location": +```json +{ + "backend": { + "backend_type": "file", + "address": "resources/crosswalk.json", + "format": "json" + }, + "observation_field_name": "observation_location", + "prediction_field_name": "prediction_location", + "field": { + "name": "prediction_location", + "where": "key", + "path": ["* where site_no"], + "origin": "$", + "datatype": "string", + "associated_fields": [ + { + "name": "observation_location", + "path": "site_no", + "datatype": "string" + } + ] + } +} +``` + +Using templates, this may be represented as: +```json +{ + "backend": { + "template_name": "JSON File", + "address": "resources/crosswalk.json" + }, + "observation_field_name": "observation_location", + "prediction_field_name": "prediction_location", + "field": { + "template_name": "Prediction Key to Observed Site Crosswalk" + } +} +``` + +This might yield something that looks like: + +| observation_location | prediction_location | +|----------------------|---------------------| +| 0214655255 | cat-52 | +| 02146562 | cat-67 | +| 0718735243 | cat-27 | + +The following JSON will instruct evaluations to pair observed data to predicted data where the observation's +`observation_location` field matches the indicated value in the prediction's `predicted_location` field: + +```json +{ + "observation_field_name": "observation_location", + "prediction_field_name": "prediction_location" +} +``` + + +## Location Specification + +![A specification for where location data should be found](../../../images/dmod.evaluations.specification.locations.LocationSpecification.png) + +[Location Specifications](locations.py) define where to find identifiers for locations within and without loaded data. +Some data may have locations on columns, some may have locations in filenames. + + +### Example + +Identify locations as those being from the `site_no` column: +```json +{ + "identify": true, + "from_field": "site_no" +} +``` + +Identify location names based on the filename from files with names like `cat-27` and `cat-52` from files +like `cat-27.csv` and `cat-52_cms.csv`: +```json +{ + "identify": true, + "from_field": "filename", + "pattern": "cat-\\d\\d" +} +``` + + + +## Threshold Definition + +![A definition of a single threshold, where it comes from, and its significance](../../../images/dmod.evaluations.specification.threshold.ThresholdDefinition.png) + +[Threshold Definitions](threshold.py) define what thresholds to apply to data, where to get their values, +how to measure them, what to call them, and how important they are. Weights in these threshold definitions operate +the same way as the weights for [Metric Specifications](#MetricSpecification), except values are relative to other +threshold definitions, not metrics. + +The name of a threshold is optional, but naming thresholds make their meaning simpler. For instance, the `p75_va` +field in NWIS Statistical Thresholds represents the `75th Percetile`. Someone familiar with the dataset +may understand that raw definition, but someone _not_ familiar with these thresholds won't understand what it means. + + +### Examples + +Use a threshold named `75th Percentile` with values from the `p75_va` field measured in `ft^3/s` with a weight of 10. +```json +{ + "name": "75th Percentile", + "field": "p75_va", + "weight": 10, + "unit": { + "value": "ft^3/s" + } +} +``` + + +## Threshold Application Rules + +![Added rules for how thresholds should be applied](../../../images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png) + +Thresholds and the data to evaluate may not always be in the same form or scale. USGS NWIS Statistical Thresholds, +for example, are daily values, with each day defined by `day_nu` and `month_nu` integer values. Predicted or +observed values, though, have their temporal values defined via year, month, day, hours, and seconds. In order +to link the correct thresholds to the correct values, [Threshold Application Rules](threshold.py) may be used to apply +transformations on different fields to ensure that thresholds may be correctly applied to evaluation data. + + +### Examples + +Apply the threshold to observation data by creating two new columns, one named `threshold_day` in the threshold data +created by converting the `month_nu` and `day_nu` integer fields into one `Day` field, and another named +`threshold_day`, created by converting the `value_date` field into one `Day` field. +```json +{ + "name": "Date to Day", + "threshold_field": { + "name": "threshold_day", + "path": [ + "month_nu", + "day_nu" + ], + "datatype": "Day" + }, + "observation_field": { + "name": "threshold_day", + "path": [ + "value_date" + ], + "datatype": "Day" + } +} +``` + +`Day` is a `dmod.evaluations` specific type that describes a single day across any years, including leap years. +If I have a day of `10/1` and data for the years `2016`, `2017`, and `2018`, that `10/1` will be equivalent to +`2016-10-01`, `2017-10-01`, and `2018-10-01`. + +A `Day` may be defined by passing a numerical day of the year, a string date, a python date type, a pandas date type, +a numpy date type, or a series of numbers to use as input values representing the `[]`, +`[, ]`, or +`[, , ]`. `[1]`, `[1, 1]`, and `[1975, 1, 1]` will +all create the same `Day` value. + + +## Threshold Specification + +![Instructions for how to load and apply thresholds to observed and predicted data](../../../images/dmod.evaluations.specification.threshold.ThresholdSpecification.png) + +[Threshold Specifications](threshold.py) define how threshold data should be loaded, how to apply thresholds to data, +and what thresholds to use. + + +### Example + +The following two examples load an RDB file named `resources/nwis_stat_thresholds.rdb`, +names locations based off of the identified `site_no` column, matches loaded thresholds onto two new columns, the first +being `threshold_day` on the threshold data, created by converting the `month_nu` and `day_nu` fields into `Day` objects, +the second being named `threshold_day` on the observation data, created by converting the `value_date` field into +`Day` objects. Use the `p75_va`, `p80_va`, and `p50_va` fields as thresholds named `"75th Percentile"`, +`"80th Percentile"`, and `"Median"`, respectively, measured in `ft^3/s` and weighing `10`, `5`, and `1`, respectively. +The `75th Percentile` threshold will be considered 10 times more important than the `Median`, while the +`80th Percentile` will be considered half as important as the `75th Percentile`, but 5 times as important as the +`Median`. + +```json +{ + "name": "NWIS Stat Percentiles", + "backend": { + "backend_type": "file", + "format": "rdb", + "address": "resources/nwis_stat_thresholds.rdb" + }, + "locations": { + "identify": true, + "from_field": "column", + "pattern": "site_no" + }, + "application_rules": { + "threshold_field": { + "name": "threshold_day", + "path": [ + "month_nu", + "day_nu" + ], + "datatype": "Day" + }, + "observation_field": { + "name": "threshold_day", + "path": [ + "value_date" + ], + "datatype": "Day" + } + }, + "definitions": [ + { + "name": "75th Percentile", + "field": "p75_va", + "weight": 10, + "unit": { + "value": "ft^3/s" + } + }, + { + "name": "80th Percentile", + "field": "p80_va", + "weight": 5, + "unit": { + "value": "ft^3/s" + } + }, + { + "name": "Median", + "field": "p50_va", + "weight": 1, + "unit": { + "value": "ft^3/s" + } + } + ] +} +``` + +The following example can produce the exact same results as the example from above but through the use of templates. + +```json +{ + "backend": { + "template_name": "NWIS Stat Thresholds" + }, + "locations": { + "template_name": "Site Number Column" + }, + "application_rules": { + "template_name": "Date to Day" + }, + "definitions": [ + { + "template_name": "75th Percentile" + }, + { + "template_name": "80th Percentile" + }, + { + "template_name": "Median" + } + ] +} +``` + + +## Unit Definition + +![A definition of what a measurement unit is or where to find it](../../../images/dmod.evaluations.specification.unit.UnitDefinition.png) + +[Unit Definitions](unit.py) tell the system how to interpret the values from data sources. Predictions, for instance, +may be expressed in cubic meters per second (`cms`), while observations may be expressed in cubic _feet_ per second +(`ft^3/s`). Values in `cms` and `ft^3/s` aren't immediately comparable, so the units need to be explicitly stated in +case of a necessary set of unit conversions. There are three options for how to get the unit. If the `value` of `cms` +is dictated, the system will interpret all primary values from the datasource as being measured in `cms`. If the +`field` of `unit` is dictated, the unit will be interpreted as whatever lies within the `unit` field of the +selected data. The former option is great for cases where the unit isn't in the dataset and is instead known via +institutional knowledge, while the former is great for cases where the unit _is_ in the dataset. + +Any type of unit may be used, but unit conversions will occur if units are not the same. When these conversions take place, +only the stock units available in [Pint](https://github.com/hgrecco/pint/blob/master/pint/default_en.txt), along with +m3, ft3, cms, cfs, and KCFS (case insensitive), may be used. + + +### Examples + +Use the values in the `unit` field as the name of the measurement unit: +```json +{ + "field": "unit" +} +``` + +Use the value `"ft^3/s"` as the unit of measurement for every piece of data loaded in this context: +```json +{ + "value": "ft^3/s" +} +``` + + + +## Metric Specification + +![The definition for what metric should be used and how important it should be](../../../images/dmod.evaluations.specification.scoring.MetricSpecification.png) + +[Metric Specifications](scoring.py) merely define what metric is intended to be used and how important it is. The +`weight` value only bears significance to _other_ defined `weight` values. If only one Metric Specification is +defined, the `weight` doesn't have much value since there isn't anything to compare it to. It's similar in the case +where _all_ defined Metric Specifications have the same weight - in this case the results are all equally important. + +The weight becomes significant when there are varying weight values. Given Metric Specifications with weights `1`, +`2`, `3`, and `4`, the last Metric Specification is considered as being the most important metric while the first is +the least important. When scores are averaged, that last metric will have a far greater impact on the results than the +first metric. + +Since the values are all relative, changing the above weights to `4`, `8`, `12`, and `16` will bear the same results. + +There are no rules for how `weight` values are defined. They may be arbitrarily high or arbitrarily low. The +importance is that they relate to one another. + + +### Example + +Use the metric named "Pearson Correlation Coefficient" with a relative weight of `10` +```json +{ + "name": "Pearson Correlation Coefficient", + "weight": 10 +} +``` + +Use the metric named "pRoBabIliTyOfDeTecTiOn" with a relative weight of `4` +```json +{ + "name": "pRoBabIliTyOfDeTecTiOn", + "weight": 4 +} +``` + +Using the above two examples at the same time will tell the evaluation that the result of the +"Pearson Correlation Coefficient" is 250% more important than the result of "Probability of Detection" + + +## Scheme Specification + +![A definition of what a measurement unit is or where to find it](../../../images/dmod.evaluations.specification.scoring.SchemeSpecification.png) + +A [Scheme Specification](scoring.py) defines the overall scoring scheme for the entire evaluation. It dictates what +metrics to use and how important they are in comparison. + + +### Examples + +Use the metrics "Pearson Correlation Coefficient", "Normalized Nash-Sutcliffe Efficiency", +"Kling-Gupta Efficiency", "Probability of Detection", and "False Alarm Ratio", but consider "Pearson Correlation Coefficient" +as the most important metric, followed by "Normalized Nash-Sutcliffe Efficiency" and "Kling-Gupta Efficiency", then followed by +"False Alarm Ratio" and "Probability of Detection". + +```json +{ + "metrics": [ + { + "name": "False Alarm Ratio", + "weight": 10 + }, + { + "name": "Probability of Detection", + "weight": 10 + }, + { + "name": "Kling-Gupta Efficiency", + "weight": 15 + }, + { + "name": "Normalized Nash-Sutcliffe Efficiency", + "weight": 15 + }, + { + "name": "Pearson Correlation Coefficient", + "weight": 18 + } + ] +} +``` + + +### Data Source Specification Examples +The following examples all describe the exact same datasource. + +The primary field for this data source will be called `observation`, which will be populated by retrieving all values +from `values[*].value[*].value` relative to `$.value.timeSeries[*]` and interpreting them as `float`s. Each of these +`observation` values will be accompanied by their corresponding `dateTime` found at `values[*].value[*].dateTime`, +the location identifier at `sourceInfo.siteCode[0].value`, and the unit of measurement at `variable.unit.unitCode`, +all relative to `$.value.timeSeries[*]`. + +The data will be loaded from a JSON file at "resources/observations.js". + +The locations will are found in the `observation_location` field and the measurement unit will be found in the `unit` +field. + +The data will be paired and evaluated by a matching `value_date` value. + +A full configuration using no templates +```json +{ + "value_field": "observation", + "value_selectors": [ + { + "name": "observation", + "where": "value", + "path": ["values[*]", "value[*]", "value"], + "datatype": "float", + "origin": ["$", "value", "timeSeries[*]"], + "associated_fields": [ + { + "name":"value_date", + "path": ["values[*]", "value[*]", "dateTime"], + "datatype": "datetime" + }, + { + "name":"observation_location", + "path": ["sourceInfo", "siteCode", "[0]", "value"], + "datatype": "string" + }, + { + "name":"unit", + "path": ["variable", "unit", "unitCode"], + "datatype": "string" + } + ] + } + ], + "backend": { + "backend_type": "file", + "format": "json", + "address": "resources/observations.json" + }, + "locations": { + "identify": true, + "from_field": "observation_location" + }, + "unit": { + "field": "unit" + }, + "x_axis": "value_date" +} +``` + +This configuration may be simplified by utilizing a few templates. The first template used, `JSON File`, simplifies +the definition for how to load JSON files by only requiring a path to the file. The second template used, +`From Observation` lets you bypass the configuration for a location by just using the common declaration. + +```json +{ + "value_field": "observation", + "value_selectors": [ + { + "name": "observation", + "where": "value", + "path": ["values[*]", "value[*]", "value"], + "datatype": "float", + "origin": ["$", "value", "timeSeries[*]"], + "associated_fields": [ + { + "name":"value_date", + "path": ["values[*]", "value[*]", "dateTime"], + "datatype": "datetime" + }, + { + "name":"observation_location", + "path": ["sourceInfo", "siteCode", "[0]", "value"], + "datatype": "string" + }, + { + "name":"unit", + "path": ["variable", "unit", "unitCode"], + "datatype": "string" + } + ] + } + ], + "backend": { + "template_name": "JSON File", + "address": "resources/observations.json" + }, + "locations": { + "template_name": "From Observation" + }, + "unit": { + "field": "unit" + }, + "x_axis": "value_date" +} +``` + +Templates may be used for just about every field. The most complicated aspect of the above configuration was the +complexity of parsing NWIS JSON WaterML responses. All of that may be bypassed by instead using the `NWIS Record` +template, which will supply the full and correct queries for every field needed when using that format. When available, +no user should need to define any of that themselves. + +```json +{ + "value_field": "observation", + "value_selectors": [ + { + "template_name": "NWIS Record" + } + ], + "backend": { + "template_name": "JSON File", + "address": "resources/observations.json" + }, + "locations": { + "template_name": "From Observation" + }, + "unit": { + "field": "unit" + }, + "x_axis": "value_date" +} +``` + + +### Evaluation Specification Examples + +The following examples all describe the exact same evaluation: + +A full configuration using no templates +```json +{ + "observations": [ + { + "name": "Observations", + "value_field": "observation", + "value_selectors": [ + { + "name": "observation", + "where": "value", + "path": ["values[*]", "value[*]", "value"], + "datatype": "float", + "origin": ["$", "value", "timeSeries[*]"], + "associated_fields": [ + { + "name":"value_date", + "path": ["values[*]", "value[*]", "dateTime"], + "datatype": "datetime" + }, + { + "name":"observation_location", + "path": ["sourceInfo", "siteCode", "[0]", "value"], + "datatype": "string" + }, + { + "name":"unit", + "path": ["variable", "unit", "unitCode"], + "datatype": "string" + } + ] + } + ], + "backend": { + "backend_type": "file", + "format": "json", + "address": "resources/observations.json" + }, + "locations": { + "identify": true, + "from_field": "value" + }, + "unit": { + "field": "unit" + }, + "x_axis": "value_date" + } + ], + "predictions": [ + { + "name": "Predictions", + "value_field": "prediction", + "value_selectors": [ + { + "name": "predicted", + "where": "column", + "associated_fields": [ + { + "name": "date", + "datatype": "datetime" + } + ] + } + ], + "backend": { + "backend_type": "file", + "format": "csv", + "address": "resources/cat.*cfs.csv", + "parse_dates": ["date"] + }, + "locations": { + "identify": true, + "from_field": "filename", + "pattern": "cat-\\d\\d" + }, + "field_mapping": [ + { + "field": "prediction", + "map_type": "column", + "value": "predicted" + }, + { + "field": "prediction_location", + "map_type": "column", + "value": "location" + }, + { + "field": "value_date", + "map_type": "column", + "value": "date" + } + ], + "unit": { + "value": "ft^3/s" + }, + "x_axis": "value_date" + } + ], + "crosswalks": [ + { + "name": "Crosswalk", + "backend": { + "backend_type": "file", + "address": "resources/crosswalk.json", + "format": "json" + }, + "observation_field_name": "observation_location", + "prediction_field_name": "prediction_location", + "field": { + "name": "prediction_location", + "where": "key", + "path": ["* where site_no"], + "origin": "$", + "datatype": "string", + "associated_fields": [ + { + "name": "observation_location", + "path": "site_no", + "datatype": "string" + } + ] + } + } + ], + "thresholds": [ + { + "name": "NWIS Stat Percentiles", + "backend": { + "name": "NWIS Stat Thresholds", + "backend_type": "file", + "format": "rdb", + "address": "resources/nwis_stat_thresholds.rdb" + }, + "locations": { + "identify": true, + "from_field": "column", + "pattern": "site_no" + }, + "application_rules": { + "name": "Date to Day", + "threshold_field": { + "name": "threshold_day", + "path": [ + "month_nu", + "day_nu" + ], + "datatype": "Day" + }, + "observation_field": { + "name": "threshold_day", + "path": [ + "value_date" + ], + "datatype": "Day" + } + }, + "definitions": [ + { + "name": "75th Percentile", + "field": "p75_va", + "weight": 10, + "unit": { + "value": "ft^3/s" + } + }, + { + "name": "80th Percentile", + "field": "p80_va", + "weight": 5, + "unit": { + "value": "ft^3/s" + } + }, + { + "name": "Median", + "field": "p50_va", + "weight": 1, + "unit": { + "value": "ft^3/s" + } + } + ] + } + ], + "scheme": { + "name": "Prefer Pearson, then Nash and Kling, then POD and FAR", + "metrics": [ + { + "name": "False Alarm Ratio", + "weight": 10 + }, + { + "name": "Probability of Detection", + "weight": 10 + }, + { + "name": "Kling-Gupta Efficiency", + "weight": 15 + }, + { + "name": "Normalized Nash-Sutcliffe Efficiency", + "weight": 15 + }, + { + "name": "Pearson Correlation Coefficient", + "weight": 18 + } + ] + } +} +``` + +A configuration using templates + +```json +{ + "observations": [ + { + "template_name": "REST Observations", + "backend": { + "params": { + "sites": "0214657975,0214655255", + "startDT": "2022-12-01T00:00%2b0000", + "endDT": "2022-12-31T00:00%2b0000" + } + } + } + ], + "predictions": [ + { + "template_name": "Predictions" + } + ], + "crosswalks": [ + { + "template_name": "Templated Crosswalk" + } + ], + "thresholds": [ + { + "template_name": "All Templates for NWIS Stat Percentiles" + } + ], + "scheme": { + "template_name": "Prefer Pearson, then Nash and Kling, then POD and FAR" + } +} +``` + +A configuration using templates with overridden values + +```json +{ + "observations": [ + { + "template_name": "Observations from Templates" + } + ], + "predictions": [ + { + "template_name": "Predictions" + } + ], + "crosswalks": [ + { + "template_name": "Templated Crosswalk" + } + ], + "thresholds": [ + { + "template_name": "All Templates for NWIS Stat Percentiles" + } + ], + "scheme": { + "template_name": "Prefer Pearson, then Nash and Kling, then POD and FAR" + } +} +``` + + +## All Elements + +When put together, the entire object tree looks like: + +![All Specifications](../../../images/all-from-dmod.evaluations.specification.png) \ No newline at end of file diff --git a/python/lib/evaluations/dmod/evaluations/specification/__init__.py b/python/lib/evaluations/dmod/evaluations/specification/__init__.py index 6ab28e36d..41ae5d429 100644 --- a/python/lib/evaluations/dmod/evaluations/specification/__init__.py +++ b/python/lib/evaluations/dmod/evaluations/specification/__init__.py @@ -22,13 +22,33 @@ import typing +SPECIFICATION_TYPES = typing.Sequence[typing.Type[Specification]] -def get_specification_options(*args, **kwargs) -> typing.Sequence[typing.Tuple[str, str]]: +def get_specification_types(all_specifications: bool = False, *args, **kwargs) -> SPECIFICATION_TYPES: from .base import get_subclasses + if all_specifications: + base_class = Specification + else: + base_class = TemplatedSpecification + + return get_subclasses(base_class) + + +def get_specification_options(all_specifications: bool = False, *args, **kwargs) -> typing.Sequence[typing.Tuple[str, str]]: + from .base import get_subclasses + + if all_specifications: + base_class = Specification + else: + base_class = TemplatedSpecification + return [ - (cls.get_specification_type(), cls.get_specification_description()) - for cls in get_subclasses(TemplatedSpecification) + ( + cls.get_specification_type(), + cls.get_specification_description() + ) + for cls in get_subclasses(base_class) ] diff --git a/python/lib/evaluations/dmod/evaluations/specification/scoring.py b/python/lib/evaluations/dmod/evaluations/specification/scoring.py index b0738260d..fd509073d 100644 --- a/python/lib/evaluations/dmod/evaluations/specification/scoring.py +++ b/python/lib/evaluations/dmod/evaluations/specification/scoring.py @@ -20,6 +20,9 @@ class MetricSpecification(TemplatedSpecification): + """ + The definition for what metric should be used and how important it should be + """ weight: typing.Union[float] = Field(description="A relative rating of the significance of this metric") def __eq__(self, other: MetricSpecification) -> bool: @@ -52,6 +55,9 @@ def apply_configuration( class SchemeSpecification(TemplatedSpecification): + """ + Instructions for how metrics should be applied to observations and forecasts along with how to interpret them + """ class Config: fields = { "metric_functions": { diff --git a/python/lib/evaluations/dmod/evaluations/specification/threshold.py b/python/lib/evaluations/dmod/evaluations/specification/threshold.py index 1da4a7595..c26861268 100644 --- a/python/lib/evaluations/dmod/evaluations/specification/threshold.py +++ b/python/lib/evaluations/dmod/evaluations/specification/threshold.py @@ -296,6 +296,9 @@ def __str__(self): class ThresholdSpecification(LoaderSpecification): + """ + Instructions for how to load and apply thresholds to observed and predicted data + """ definitions: typing.List[ThresholdDefinition] = Field( description="The thresholds to apply to data" ) diff --git a/python/lib/evaluations/dmod/evaluations/util.py b/python/lib/evaluations/dmod/evaluations/util.py index 881979ced..d765461e1 100644 --- a/python/lib/evaluations/dmod/evaluations/util.py +++ b/python/lib/evaluations/dmod/evaluations/util.py @@ -603,13 +603,14 @@ def __init__( day = possible_args[0] elif len(possible_args) == 2: # We are going to interpret this as month-day + # The year doesn't matter since we are focused on a day of the year separate from the year itself day = pandas.Timestamp(year=2020, month=possible_args[0], day=possible_args[1]) elif len(possible_args) > 3: # We're going to interpret this as year-month-day. Further args may include time, but those are not # important for this day = pandas.Timestamp(year=possible_args[0], month=possible_args[1], day=possible_args[2]) else: - raise ValueError("A list of no numbers was passed; a Day cannot be interpretted.") + raise ValueError("A list of no numbers was passed; a Day cannot be interpreted.") if isinstance(day, str) and value_is_number(day): day = float(day) diff --git a/python/lib/evaluations/images/all-from-dmod.evaluations.specification.png b/python/lib/evaluations/images/all-from-dmod.evaluations.specification.png new file mode 100644 index 000000000..68a1227b2 Binary files /dev/null and b/python/lib/evaluations/images/all-from-dmod.evaluations.specification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.backend.BackendSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.backend.BackendSpecification.png new file mode 100644 index 000000000..b3194a68c Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.backend.BackendSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.data.DataSourceSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.data.DataSourceSpecification.png new file mode 100644 index 000000000..50615d92c Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.data.DataSourceSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png new file mode 100644 index 000000000..4d2b20bf6 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.fields.AssociatedField.png b/python/lib/evaluations/images/dmod.evaluations.specification.fields.AssociatedField.png new file mode 100644 index 000000000..2e7f88541 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.fields.AssociatedField.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.fields.FieldMappingSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.fields.FieldMappingSpecification.png new file mode 100644 index 000000000..bf8d34b7c Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.fields.FieldMappingSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.fields.ValueSelector.png b/python/lib/evaluations/images/dmod.evaluations.specification.fields.ValueSelector.png new file mode 100644 index 000000000..6122c3ed0 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.fields.ValueSelector.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.locations.CrosswalkSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.locations.CrosswalkSpecification.png new file mode 100644 index 000000000..09534e6bf Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.locations.CrosswalkSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.locations.LocationSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.locations.LocationSpecification.png new file mode 100644 index 000000000..d31a586e7 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.locations.LocationSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.scoring.MetricSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.MetricSpecification.png new file mode 100644 index 000000000..ee2145bee Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.MetricSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.scoring.SchemeSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.SchemeSpecification.png new file mode 100644 index 000000000..b217f10e4 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.SchemeSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png new file mode 100644 index 000000000..7dab285f2 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdDefinition.png b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdDefinition.png new file mode 100644 index 000000000..4291704e5 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdDefinition.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdSpecification.png new file mode 100644 index 000000000..a534878a9 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdSpecification.png differ diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.unit.UnitDefinition.png b/python/lib/evaluations/images/dmod.evaluations.specification.unit.UnitDefinition.png new file mode 100644 index 000000000..1240d9bd8 Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.unit.UnitDefinition.png differ diff --git a/python/lib/evaluations/update_diagrams.py b/python/lib/evaluations/update_diagrams.py new file mode 100644 index 000000000..c3f68e820 --- /dev/null +++ b/python/lib/evaluations/update_diagrams.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +""" +@TODO: Describe the application here +""" +import os +import sys +import typing + +import multiprocessing + +from pathlib import Path + +import dmod.evaluations.specification as specification + +from argparse import ArgumentParser + + +try: + import erdantic +except: + print( + "Erdantic is required in order to produce diagrams, but was not found. " + "Please install a compatible version if updated diagrams are needed.", + file=sys.stderr + ) + exit(255) + + +DIAGRAM_DIRECTORY = "./images" + + +class Arguments(object): + def __init__(self, *args): + # Replace '__option' with any of the expected arguments + self.__option: typing.Optional[str] = None + + self.__parse_command_line(*args) + + # Add a property for each argument + @property + def option(self) -> str: + return self.__option + + def __parse_command_line(self, *args): + parser = ArgumentParser("Put the description of your application here") + + # Add Arguments + parser.add_argument( + "-o", + metavar="option", + dest="option", + type=str, + default="default", + help="This is an example of an option" + ) + + # Parse the list of args if one is passed instead of args passed to the script + if args: + parameters = parser.parse_args(args) + else: + parameters = parser.parse_args() + + # Assign parsed parameters to member variables + self.__option = parameters.option + + +def main(): + """ + Define your main function here + """ + arguments = Arguments() + classes_to_diagram: typing.Sequence[typing.Type[specification.Specification]] = specification.get_specification_types(all_specifications=True) + failures: typing.List[str] = [] + + class_to_diagram: typing.Optional[typing.Type[specification.Specification]] = None + + for class_to_diagram in classes_to_diagram: + full_name = f"{class_to_diagram.__module__}.{class_to_diagram.__qualname__}" + output_path = os.path.join(DIAGRAM_DIRECTORY, f"{full_name}.png") + + try: + erdantic.draw(class_to_diagram, out=output_path, depth_limit=0) + except BaseException as exception: + message = f"Failed to draw a graph at: '{full_name}'{os.linesep} {exception}" + failures.append(message) + else: + real_path = Path(output_path) + print(f"Wrote a diagram for '{full_name}' to '{real_path.resolve()}'") + + if classes_to_diagram is not None: + diagram_for_all_path = f"all-from-dmod.evaluations.specification" + output_path = os.path.join(DIAGRAM_DIRECTORY, f"{diagram_for_all_path}.png") + + try: + erdantic.draw(*classes_to_diagram, out=output_path, depth_limit=9999, orientation=erdantic.Orientation.VERTICAL) + except BaseException as exception: + message = f"Failed to draw a graph at: '{diagram_for_all_path}'{os.linesep} {exception}" + failures.append(message) + else: + real_path = Path(output_path) + print(f"Wrote a diagram for '{diagram_for_all_path}' to '{real_path.resolve()}'") + + for message in failures: + print(message, file=sys.stderr) + + exit(len(failures)) + +if __name__ == "__main__": + main() diff --git a/python/lib/metrics/README.md b/python/lib/metrics/README.md index 96dd36434..c7f06d38d 100644 --- a/python/lib/metrics/README.md +++ b/python/lib/metrics/README.md @@ -1,5 +1,293 @@ -# About -Python package for utilities related to forcing, meta, and other modeling-related data for NWM MaaS. +# Distributed Model on Demand - Metrics -# Structure -Structure has been modified from original to have inner duplicate directory in order to comply with general Python packaging structure. This facilitates executing tests in a variety of different scenarios, including running integration tests on a local machine using the included test script and/or running unit tests directly within an IDE. +The Distributed Model on Demand (DMOD) metrics package is a python library dedicated exclusively +to describing available functionality and performing mathematical operations upon provided data. + +## How are metrics called? + +There lies a class at `dmod.metrics.scoring` named `ScoringScheme` that manages operations. +You first provide it a list of all metrics you intend to gather from your input data and a set of +`Communicator` objects to help distribute information generated during evaluation. Next, you can +call the `ScoringScheme` as a function with prepared pairs, where to find "observed" values within +the pairs, where to find the "predicted" values within the pairs, and thresholds. This invocation +will yield a `MetricResults` object. + +```python +import pandas +import dmod.metrics as metrics + +pairs = pandas.read_csv("path/to/pairs.csv") + +scheme = metrics.ScoringScheme([ + metrics.KlingGuptaEfficiency(3), + metrics.PearsonCorrelationCoefficient(6), +]) + +results = scheme.score(pairs=pairs, observed_value_label="observation", predicted_value_label="prediction") +``` + +This same `ScoringScheme` may be called many times over with different sets of data, usually +corresponding to different locations, while maintaining a common standard of expected results. + +```python +results2 = scheme.score(pairs=pandas.read_csv("path/to/pairs2.csv"), observed_value_label="observation", predicted_value_label="prediction") +results3 = scheme.score(pairs=pandas.read_csv("path/to/pairs3.csv"), observed_value_label="observation", predicted_value_label="prediction") +results4 = scheme.score(pairs=pandas.read_csv("path/to/pairs4.csv"), observed_value_label="observation", predicted_value_label="prediction") +``` + +## What do `MetricResults` provide? + +`dmod.metrics.MetricResults` objects provide access to individual metrics and tools for interpreting +results in different ways and making it easier to serialize results for further communication. + +## How can `MetricResults` interpret the outcome of an evaluation? + +Each `MetricResults` object contains the evaluated metrics performed on a singular set of data. +When providing the `Metric`s that are run to the `ScoringScheme`, a weight is passed along for +each metric. Passing thresholds to the invocation will provide weights for each threshold. This +provides a basis to establish a hierarchy of importance for each metric and threshold. For +example, the `Critical Success Index` may be deemed as twice as important as the results for +the `Normalized Nash-Sutcliffe Efficiency` and results for the "Major" threshold may be deemed +1.5 times more important than the results for the "Moderate" threshold. + +These weights, along with metadata for each metric provides a means of scaling and grading. The +`PearsonCorrelationCoefficient` `Metric` class, for example, stores the maximum value as `1`, +the minimum value as `-1`, the ideal value as `1`, and `0` marks a total failure of the predicted +data to correlate in some fashion to the observed data (negative values aren't considered a total +failure since they indicate some degree of negative correlation whereas `0` is absolutely none +whatsoever). For the following example, let us say that the result of a given instance of +`PearsonCorrelationCoefficent` with a weight of `7` has a result of `0.6734` for the "Major" +threshold. This is interpreted as a scaled value of `4.7138`. Now let us say we have an +instance of `ProbabilityOfFalseDetection` with a weight of `3` that has a result of `0.12`. +This is interpreted as having a scaled value of `2.64`. If those are the only metrics considered +for the "Major" threshold, the maximum possible value for that threshold is `10` ( +`PearsonCorrelationCoefficient` with a weight of `7` and `ProbabilityOfFalseDetection` with +a weight of `3`). Since those results were `4.7138` and `2.64`, their total value was `7.3538` +out of `10`, or a grade of `73.538%`. Now say the only other threshold being evaluated was +"Moderate" with a weight of `3` and a total of `2.73`, or a grade of `91%`. The overall result +of the combination of these two thresholds is now `7.3538` + `2.73` out of `10` + `3`, or +`10.0838` out of `13`, with a grade of `77.567%`. + +## What is a `Metric` in the codebase? + +An instance of `dmod.metrics.scoring.Metric` is an object that may be called to provide scores +for a collection of pairs. Examples of these +`dmod.metrics.scoring.Metric` classes are `dmod.metrics.ProbabilityOfFalseDetection` +and `dmod.metrics.PearsonCorrelationCoefficient`. These are constructed with a given weight, so +an instance of `dmod.metrics.ProbabilityOfDetection` may be created with a weight of `3` and +an instance of `dmod.metrics.KlingGuptaEfficiency` may be created with a weight of `8`. + +New metrics may be implemented simply by declaring the new class and implementing `get_description`, +which will allow outside code to discover what this new metric is expected to do, `get_name`, which +provides an easy to identify name, and `__call__`, which is where the actual operation will be +performed. + +A very simple example would be: + +```python +class VolumeError(scoring.Metric): + @classmethod + def get_descriptions(cls): + return "The difference between the observed volume and the predicted volume" + + @classmethod + def get_name(cls) -> str: + return "Volume Error" + + def __init__(self, weight: NUMBER): + """ + Constructor + + Args: + weight: The relative significance of the metric + """ + super().__init__( + weight=weight, + ideal_value=0, + greater_is_better=False + ) + + def __call__( + self, + pairs: pandas.DataFrame, + observed_value_label: str, + predicted_value_label: str, + thresholds: typing.Sequence[Threshold] = None, + *args, + **kwargs + ) -> scoring.Scores: + scores: typing.List[scoring.Score] = list() + + for volume_threshold in thresholds: + filtered_pairs = volume_threshold(pairs) + difference = 0 + if not filtered_pairs.empty: + dates: typing.List[int] = [value.astype("int") for value in filtered_pairs.index.values] + area_under_observations = sklearn.metrics.auc(dates, filtered_pairs[observed_value_label]) + area_under_predictions = sklearn.metrics.auc(dates, filtered_pairs[predicted_value_label]) + difference = area_under_predictions - area_under_observations + scores.append(scoring.Score(self, difference, volume_threshold, sample_size=len(filtered_pairs))) + + return scoring.Scores(self, scores) +``` + +This provides everything needed to use outside logic provided by `SciKit-Learn` _and_ make the metric +easy to explore. + +## What about categorical metrics? + +`dmod.metrics` has a wide range of support for categorical metrics primarily due to their bounded nature. +This means that they naturally have well-defined information for scaling and grading. + +Implementing categorical metrics is relatively easy since they rely on truth tables. Probability of Detection, +for example, is incredibly easy to implement: + +```python +class ProbabilityOfDetection(CategoricalMetric): + @classmethod + def get_descriptions(cls): + return "The probability that something was detected. Sensitive to hits, but ignores false alarms. " \ + "Very sensitive to the climatological frequency of the event. Good for rare events." + + def _get_values(self, tables: categorical.TruthTables) -> typing.Iterable[KEY_AND_ROW]: + return tables.probability_of_detection.iterrows() + + @classmethod + def get_metadata(cls) -> categorical.CategoricalMetricMetadata: + return categorical.TruthTable.get_metric_metadata("probability_of_detection") +``` + +All this ends up doing is gathering the `probability_of_detection` function that has already been defined +on `TruthTables` collections that hold `TruthTable` objects: + +```python + @property + def probability_of_detection(self) -> pandas.DataFrame: + """ + A frame depicting the probability of detection for each truth table + """ + probabilities_of_detection = [ + { + "series_weight": self.__weight, + "threshold": table.name, + "threshold_weight": table.weight, + "value": table.probability_of_detection(), + "sample_size": len(table) + } + for table in self.__tables.values() + ] + + return pandas.DataFrame(probabilities_of_detection) +``` + +## What is meant by "Discoverability"? + +Metadata may be collected about each implemented Metric without any sort of outside code or any +needed hardcoding. Anything that may invoke `dmod.metrics` may be able to perform operations such as: + +```python +from pprint import pprint +import dmod.metrics as metrics + +pprint(metrics.get_metric_options()) +``` + +and see: + +```shell +[{'description': 'The probability that something was detected. Sensitive to ' + 'hits, but ignores false alarms. Very sensitive to the ' + 'climatological frequency of the event. Good for rare events.', + 'identifier': 'probabilityofdetection', + 'name': 'Probability Of Detection'}, + {'description': 'The probability that something was falsely reported as ' + 'happening. Sensitive to false alarms, but ignores misses. ' + 'Very sensitive to the climatological frequency of the event. ' + 'Should be used in conjunction with the probability of ' + 'detection.', + 'identifier': 'falsealarmratio', + 'name': 'False Alarm Ratio'}, + {'description': 'Sensitive to false alarms, but ignores misses. Can be ' + "artificially improved by issuing fewer 'yes' forecasts to " + 'reduce the number of false alarms. Not often reported for ' + 'deterministic forecasts, but is an important component of ' + 'the Relative Operating Characteristic (ROC) used widely for ' + 'probabilistic forecasts.', + 'identifier': 'probabilityoffalsedetection', + 'name': 'Probability Of False Detection'}, + ... +``` + +This provides more than enough information needed to build user interfaces or gather information +to provide further context to data. Say I receive a value for `"equitablethreatscore"`. What does +that value indicate? Well, the metadata explains that the `"equitablethreatscore"` may be +displayed as `"Equitable Threat Score"` and describes `'How well did the forecast "yes" events correspond +to the observed "yes" events (accounting for hits due to chance)? Sensitive to hits. Because it +penalises both misses and false alarms in the same way, it does not distinguish the source of +forecast error.'` + +## What is a `Communicator`? + +A `dmod.metrics.Communicator` is an event based mechanism for handling data emission events. +`Communicator`s are stored within `dmod.metrics.CommunicatorGroup`s which may handle more wide scale +communication operations. An example can be found in the `score` function of +`dmod.metrics.ScoringScheme` where each evaluation of a metric is announced: + +```python + for metric in self.__metrics: # type: Metric + self.__communicators.info(f"Calling {metric.name}", verbosity=Verbosity.LOUD, publish=True) + ... +``` + +This means that the `info` event will be triggered on each held `Communicator`, but only on those set to handle +messages of a verbosity of `LOUD` or greater, and to call the `write` event after doing so. + +Say I have three communicators: + +1. Writes errors to stderr with a verbosity of `LOUD` (operates when very little data is necessary) +2. Writes information with a verbosity of `QUIET` to a file +3. Sends `LOUD` messages through Redis channels +(see `dmod.evaluation_service.utilities.communication.RedisCommunicator`) + +Per the above example, `Communicator` 1 won't perform any operations because it only handles errors and this +was just standard information. `Communicator` 2 won't perform operations because the given message was meant to be loud +and `Communicator` 2 is meant to only handle `QUIET` data. `Communicator` 3, though, will handle the message by +transforming the information into a common format, adding it to a list in a specified Redis instance, and call the +`write` command on it. The `write` command will send the message to all clients listening to that redis channel +and send that transformed data to any added handlers for the `write` event. + +## How can I invoke a metric? + +There's not a lot of complexity when it comes to just calling metrics: + +```python +from pprint import pprint + +import pandas + +import dmod.metrics as metrics + +pearson = metrics.PearsonCorrelationCoefficient(5) +observation_key = "observed" +model_key = "modeled" +data = pandas.DataFrame({observation_key: [1, 2, 3, 4, 5], model_key: [2, 3, 4, 5, 6]}) +results = pearson(data, observation_key, model_key) +pprint(results.to_dict()) +``` + +which yields: + +```shell +{'grade': '100.00%', + 'scaled_value': 4.99, + 'scores': {'All': {'failed': False, + 'grade': 99.999, + 'sample_size': 5, + 'scaled_value': 0.99, + 'value': 0.99, + 'weight': 1}}, + 'total': 0.9999999999999999} +``` + +`dmod.evaluations` provides functionality that helps with more advanced operations +(such as adding thresholds and operating upon many metrics) \ No newline at end of file diff --git a/python/lib/metrics/dmod/metrics/metric.py b/python/lib/metrics/dmod/metrics/metric.py index a30643691..30a229492 100644 --- a/python/lib/metrics/dmod/metrics/metric.py +++ b/python/lib/metrics/dmod/metrics/metric.py @@ -6,8 +6,8 @@ * Forecast Verification - Issues, Methods and FAQ, Ebert, https://www.cawcr.gov.au/projects/verification/verif_web_page.html """ +from __future__ import annotations -import os import typing import abc import math @@ -42,7 +42,7 @@ def is_type(value: object, value_type: typing.Type) -> bool: """ - Determines whether or not the given value matches the given type + Determines whether the given value matches the given type This can be used to evaluated types such as unions since you cannot use `isinstance` @@ -51,7 +51,7 @@ def is_type(value: object, value_type: typing.Type) -> bool: value_type: The type to check against Returns: - Whether or not the value matches the given type + Whether the value matches the given type """ value_is_valid = False @@ -184,7 +184,7 @@ def find_truthtables_key(**kwargs) -> typing.Optional[str]: Returns: "TRUTH_TABLES" if there's a TruthTables in the kwargs, otherwise the first TruthTables present if it exists """ - # Find all TruthTables in the passed kwargs + # Find all TruthTables in the provided kwargs keys = [ key for key, value in kwargs.items() diff --git a/python/lib/metrics/dmod/metrics/scoring.py b/python/lib/metrics/dmod/metrics/scoring.py index 63aa083b7..53110114d 100644 --- a/python/lib/metrics/dmod/metrics/scoring.py +++ b/python/lib/metrics/dmod/metrics/scoring.py @@ -470,7 +470,7 @@ def __str__(self): return f"{self.name}: {self.value} out of {self.maximum_value}" -class Scores(abstract_collections.Sized, abstract_collections.Iterable): +class Scores(abstract_collections.Sequence): def __len__(self) -> int: return len(self.__results) @@ -592,7 +592,7 @@ def __repr__(self) -> str: return self.__str__() -class MetricResults(object): +class MetricResults: """ A mapping thresholds to a variety of metrics and their values