diff --git a/python/lib/evaluations/README.md b/python/lib/evaluations/README.md
index e69de29bb..475e380dc 100644
--- a/python/lib/evaluations/README.md
+++ b/python/lib/evaluations/README.md
@@ -0,0 +1 @@
+# Distributed Model on Demand - Evaluations
\ No newline at end of file
diff --git a/python/lib/evaluations/dmod/evaluations/__init__.py b/python/lib/evaluations/dmod/evaluations/__init__.py
index ad03cefaf..8b1378917 100644
--- a/python/lib/evaluations/dmod/evaluations/__init__.py
+++ b/python/lib/evaluations/dmod/evaluations/__init__.py
@@ -1,49 +1 @@
-#!/usr/bin/env python3
-import typing
-from argparse import ArgumentParser
-
-
-class Arguments(object):
- def __init__(self, *args):
- self.__option: typing.Optional[str] = None
-
- self.__parse_command_line(*args)
-
- @property
- def option(self) -> str:
- return self.__option
-
- def __parse_command_line(self, *args):
- parser = ArgumentParser("Put a description for your script here")
-
- # Add options
- parser.add_argument(
- "-o",
- metavar="option",
- dest="option",
- type=str,
- default="default",
- help="This is an example of an option"
- )
-
- # Parse the list of args if one is passed instead of args passed to the script
- if args:
- parameters = parser.parse_args(args)
- else:
- parameters = parser.parse_args()
-
- # Assign parsed parameters to member variables
- self.__option = parameters.option
-
-
-def main():
- """
- Define your initial application code here
- """
- arguments = Arguments()
-
-
-# Run the following if the script was run directly
-if __name__ == "__main__":
- main()
diff --git a/python/lib/evaluations/dmod/evaluations/specification/README.md b/python/lib/evaluations/dmod/evaluations/specification/README.md
new file mode 100644
index 000000000..f16b18cd3
--- /dev/null
+++ b/python/lib/evaluations/dmod/evaluations/specification/README.md
@@ -0,0 +1,1342 @@
+# Specification
+
+Evaluation workflows are highly configurable via the use of required evaluation specifications. There are a lot
+of different options, but this level of complexity may be mitigated through the use of [templates](#templates).
+
+## Table of Contents
+
+- [A Word on Templates](#templates)
+- [Querying Data](#querying)
+- [Evaluation Specification](#EvaluationSpecification)
+ - [Examples](#EvaluationSpecificationExamples)
+- [Data Source Specification](#DataSourceSpecification)
+ - [Examples](#DataSourceSpecificationExamples)
+- [Backend Specification](#BackendSpecification)
+ - [Examples](#BackendSpecificationExamples)
+- [Associated Field](#AssociatedField)
+ - [How to Use Paths](#AssociatedFieldPaths)
+ - [Examples](#AssociatedFieldExamples)
+- [Field Mapping Specification](#FieldMappingSpecification)
+ - [Examples](#FieldMappingSpecificationExamples)
+- [Value Selector](#ValueSelector)
+ - [How to Use Paths](#ValueSelectorPaths)
+ - [Examples](#ValueSelectorExamples)
+- [Crosswalk Specification](#CrosswalkSpecification)
+ - [Examples](#CrosswalkSpecificationExamples)
+- [Location Specification](#LocationSpecification)
+ - [Examples](#LocationSpecificationExamples)
+- [Metric Specification](#MetricSpecification)
+ - [Examples](#MetricSpecificationExamples)
+- [Threshold Specification](#ThresholdSpecification)
+ - [Examples](#ThresholdSpecificationExamples)
+- [Threshold Definition](#ThresholdDefinition)
+ - [Examples](#ThresholdDefinitionExamples)
+- [Threshold Application Rules](#ThresholdApplicationRules)
+ - [Examples](#ThresholdApplicationRulesExamples)
+- [Unit Definition](#UnitDefinition)
+ - [Examples](#UnitDefinitionExamples)
+- [Scheme Specification](#SchemeSpecification)
+ - [Examples](#SchemeSpecification)
+- [All Specification Elements](#all-elements)
+
+
+## A word on templates
+
+Templating in evaluation specifications is a means of using preconfigured logic within new configurations.
+Many configurations may be the same or they may be mostly the same. Configuring full or partial configurations
+and attaching a template name to a configuration will apply the template settings prior to the application of
+passed configurations.
+
+Templates are supported on any model that has the `template_name` property. To use an existing template,
+all that must be done to include it is to set the value of `template_name` to it:
+
+```json
+{
+ "observations": [
+ {
+ "template_name": "Observation Template"
+ }
+ ]
+}
+```
+
+Templates are environment specific - one environment may have an important template while another might not,
+but the templates are configurable, so more and more may be created as new use cases arise. Template Manager
+constructs (such as the [FileTemplateManager](template.py)) provide all the means necessary to find out what templates are
+available. Services providing access to evaluations should provide querying capabilities so that templates may be
+reused as much as possible.
+
+
+## Querying Data
+
+Structured data, such as with JSON, may be queried with the help of [JSONPath](https://goessner.net/articles/JsonPath/),
+a query language used as an analog to [XPaths](https://en.wikipedia.org/wiki/XPath).
+
+Given a document like:
+
+```json
+{
+ "a": 5,
+ "b": [
+ {
+ "value1": 1,
+ "value2": {
+ "value3": "This is another value"
+ },
+ "value3": false
+ },
+ {
+ "value1": 2,
+ "value2": {
+ "value3": "This is yet another value that we'll use as an example"
+ },
+ "value3": true
+ },
+ {
+ "value1": 47,
+ "value2": {
+ "value3": "Look at this awesome value!"
+ },
+ "value3": true
+ }
+ ]
+}
+```
+
+the query `"$.b[1].value2.value3"` will yield '"This is yet another value that we'll use as an example"'. The
+`$` character instructs the search operations to start looking at the root of the document. The next instruction,
+`b` tells the search operation to look for values under `b`. `[1]` tells the operation to then look in the
+second member of the collection held under `b`. `value2` tells the search process to _then_ search under the `value2`
+object where the final `value3` instruction retrieves the value belonging to `value3`.
+
+The equivalent hardcoded instructions in python would be:
+
+```python
+example = {
+ "a": 5,
+ "b": [
+ {
+ "value1": 1,
+ "value2": {
+ "value3": "This is another value"
+ },
+ "value3": False
+ },
+ {
+ "value1": 2,
+ "value2": {
+ "value3": "This is yet another value that we'll use as an example"
+ },
+ "value3": True
+ },
+ {
+ "value1": 47,
+ "value2": {
+ "value3": "Look at this awesome value!"
+ },
+ "value3": True
+ }
+ ]
+}
+
+equivalent_value = example['b'][1]['value2']['value3']
+print(equivalent_value)
+# Output: This is yet another value that we'll use as an example
+```
+
+Queries don't have to start at the root, but it _is_ advised. a query such as `"value1"` would yield `[1, 2, 47]`, but
+a query of `"value3"` would yield `["This is another value", false, "This is yet another value that we'll use as an
+example", true, "Look at this awesome value!", true]`.
+
+Investigate [Associated Fields](#AssociatedField) and [Value Selectors](#ValueSelector) to see how paths are used in
+practice.
+
+
+## Evaluation Specification
+
+![Instructions for how different aspect of an evaluation should work](../../../images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png)
+
+The [Evaluation Specification](evaluation.py) is the primary unit of configuration required in order to carry
+out evaluations. This element contains every bit of needed information, from where to load what data to what
+metrics to run on it. Each evaluation will require one and only one evaluation specification.
+
+
+## Data Source Specification
+
+![Specification for where to get the actual data for evaluation](../../../images/dmod.evaluations.specification.data.DataSourceSpecification.png)
+
+The [Data Source Specification](data.py) is the block of configuration responsible for loading a set of data
+that will be joined with others for evaluation. Loading data by using instructions provided by a `DataSourceSpecification`
+will yield a Data Frame that will be ready for manipulation.
+
+The most important aspects defined by a `DataSourceSpecification` are:
+
+1. What fields to load
+2. What the data is [measured in or how to find out](#unit-definition)
+3. What locations are represented within the data or how to find out
+4. How to load and interpret the raw data
+5. What field within the resultant data should be used for all calculations.
+
+
+## Backend Specification
+
+![A specification for how data should be loaded](../../../images/dmod.evaluations.specification.backend.BackendSpecification.png)
+
+[Backend Specifications](backend.py) dictate how data is loaded. As of writing, there are two different
+backend types: files and `REST`. Unlike a lot of the other configuration types, the `properties` attribute may be
+truly important in that some readers require extra information. `REST` calls may require extra parameters.
+For instance, reaching out to NWIS for isntantaneous streamflow data may require information, such as a comma
+delimited list of sites to retrieve data for, a `startDT` and `endDT` to indicate the timeframe for what data to
+retrieve, and `parameterCd`, which will dictate what data to pull back (`00060` would be streamflow).
+
+Expect these parameters to be more important to services than local files due to the further complexity due to
+querying.
+
+
+### Example
+Load a local RDB file stored at "resources/nwis_stat_thresholds.rdb":
+```json
+{
+ "backend_type": "file",
+ "format": "rdb",
+ "address": "resources/nwis_stat_thresholds"
+}
+```
+
+Retrieve streamflow data from NWIS' Instantaneous Values service for locations "0214657975" and
+"0214655255", with values ocurring between midnight 2022-12-01 and midnight 2022-12-31:
+```json
+{
+ "backend_type": "rest",
+ "format": "json",
+ "address": "https://nwis.waterservices.usgs.gov/nwis/iv",
+ "params": {
+ "format": "json",
+ "indent": "on",
+ "sites": "0214657975,0214655255",
+ "startDT": "2022-12-01T00:00%2b0000",
+ "endDT": "2022-12-31T00:00%2b0000",
+ "parameterCd": "00060"
+ }
+}
+```
+
+Use the "Instantaneous NWIS Streamflow" template to retrieve streamflow data from location "0214657975"
+from between midnight 2023-09-01 and midnight 2023-09-14:
+```json
+{
+ "template_name": "Instantaneous NWIS Streamflow",
+ "params": {
+ "sites": "0214657975",
+ "startDT": "2023-09-01T00:00%2b0000",
+ "endDT": "2023-09-14T00:00%2b0000"
+ }
+}
+```
+
+Retrieve data "path/to/file.json" in the style handled by the "JSON File" template
+```json
+{
+ "template_name": "JSON File",
+ "address": "path/to/file.json"
+}
+```
+
+
+## Associated Field
+
+![A specification for additional data that should accompany selected data](../../../images/dmod.evaluations.specification.fields.AssociatedField.png)
+
+Configuring [Associated Fields](fields.py) helps dictate what data should be placed with the selected values.
+Retrieving data via a [Value Selector](#ValueSelector) might return a series of values, but may not contain the
+collection of values necessary for context. For example, selecting values in NWIS JSON may be performed by gathering
+data from `"values[*].value[*].value"`, but that doesn't tell you _when_ those values occurred. If you associate
+those values with the date times from `"values[*].value[*].dateTime"`, however, you'll now have a set of data containing
+values _and_ context.
+
+
+### Example
+
+Use data at `sourceInfo.siteCode[0].value`, starting from the current origin, as the accompanying location
+for the currently identified measurement
+```json
+{
+ "name":"observation_location",
+ "path": ["sourceInfo", "siteCode", "[0]", "value"],
+ "datatype": "string"
+}
+```
+
+Consider the adjacent `date` field as a `datetime` object for read measurements
+```json
+{
+ "name": "date",
+ "datatype": "datetime"
+}
+```
+
+
+## Field Mapping Specification
+
+![Details on how a field should be aliased](../../../images/dmod.evaluations.specification.fields.FieldMappingSpecification.png)
+
+Field mapping allows the renaming of fields that will appear during evaluation and in outputs. Some sources provide
+hard to understand or uncommon names. For instance, some inputs will have a variable with the name `Q_out` that
+will need to be used alongside data named `streamflow` or `discharge`. Renaming fields allow for common terms
+across a multitude of different types of evaluations and allow for easier downstream tooling.
+
+
+### Example
+
+Rename the "date" field as "value_date" for processing
+```json
+{
+ "field": "value_date",
+ "map_type": "column",
+ "value": "date"
+}
+```
+
+Consider the value of "site_no" as the field named "location"
+```json
+{
+ "field": "location",
+ "map_type": "value",
+ "value": "site_no"
+}
+```
+
+
+## Value Selector
+
+![Instructions for how to retrieve values from a data source](../../../images/dmod.evaluations.specification.fields.ValueSelector.png)
+
+[Value Selectors](fields.py) are the most important part of any data source configuration: they define what fields
+should be loaded and how. Data that has been loaded from a backend may provide raw values, but it is the job
+of Value Selectors to investigate that data and extract what to use. Say a CSV file is used and it contains fields like
+`d`, `flow`, `temperature`, `dew point`, `age`, `altitude`, `loc`, `turpidity`, `region`, `state`, `operator`, and
+`stage`, that looks like:
+
+| d | flow | temperature | dew point | age | altitude | loc | turpidity | region | state | operator | stage |
+|------------------|------|-------------|-----------|-----|----------|-------------------|-----------|--------|-------|----------|-------|
+| 2023/10/01 00:00 | 37.8 | 25 | | 45 | 1054.24 | SOME LOCATION, AL | `null` | SERFC | AL | USACE | 14 |
+| 2023/10/01 01:00 | 37.6 | 24.8 | | 55 | 1054.24 | SOME LOCATION, AL | `null` | SERFC | AL | USACE | 13 |
+
+
+A configuration like:
+
+```json
+{
+ "name": "streamflow",
+ "where": "column",
+ "path": "flow",
+ "datatype": "float",
+ "associated_fields": [
+ {
+ "name": "value_date",
+ "path": "d",
+ "datatype": "datetime"
+ },
+ {
+ "name": "location",
+ "path": "loc",
+ "datetype": "string"
+ }
+ ]
+}
+```
+
+will transform that into:
+
+| value_date | streamflow | location |
+|--------------------------|------------|-------------------|
+| 2023-10-01 00:00:00-0000 | 37.8 | SOME LOCATION, AL |
+| 2023-10-01 01:00:00-0000 | 37.6 | SOME LOCATION, AL |
+
+
+### Example
+
+Use each value located at `"values[*].value[*].value"`, starting from every node found at
+`"$.value.timeSeries[*]"` as a floating point number used for a field named `observation`. When selecting that value,
+also select `"values[*].value[*].datetime"` as a `datetime` field named `"value_date"`, `"sourceInfo.siteCode[0].value"`
+as a string for a field named 'observation_location', and `"variable.unit.unitCode"` as a string field named "unit".
+
+```json
+{
+ "name": "observation",
+ "where": "value",
+ "path": ["values[*]", "value[*]", "value"],
+ "datatype": "float",
+ "origin": ["$", "value", "timeSeries[*]"],
+ "associated_fields": [
+ {
+ "name":"value_date",
+ "path": ["values[*]", "value[*]", "dateTime"],
+ "datatype": "datetime"
+ },
+ {
+ "name":"observation_location",
+ "path": ["sourceInfo", "siteCode", "[0]", "value"],
+ "datatype": "string"
+ },
+ {
+ "name":"unit",
+ "path": ["variable", "unit", "unitCode"],
+ "datatype": "string"
+ }
+ ]
+}
+```
+
+This will select values that might look like:
+
+| observation | value_date | observation_location | unit |
+|-------------|-------------------------------|----------------------|-------|
+| 46.9 | 2015-11-30T20:00:00.000-05:00 | 0214655255 | ft3/s |
+| 50.2 | 2015-11-30T20:05:00.000-05:00 | 0214655255 | ft3/s |
+| 48.2 | 2015-11-30T20:10:00.000-05:00 | 0214655255 | ft3/s |
+
+The following might yield the same result:
+```json
+{
+ "name": "observation",
+ "where": "value",
+ "path": ["values[*]", "value[*]", "value"],
+ "datatype": "float",
+ "origin": ["$", "value", "timeSeries[*]"],
+ "associated_fields": [
+ {
+ "template_name": "NWIS Value Date"
+ },
+ {
+ "template_name": "NWIS Observation Location"
+ },
+ {
+ "template_name": "NWIS Unit"
+ }
+ ]
+}
+```
+
+Use the column named '"predicted"' and match it with the adjacent column named '"date"':
+```json
+{
+ "name": "predicted",
+ "where": "column",
+ "associated_fields": [
+ {
+ "name": "date",
+ "datatype": "datetime"
+ }
+ ]
+}
+```
+
+
+## Crosswalk Specification
+
+![Specifies how locations in the observations should be linked to locations in the predictions](../../../images/dmod.evaluations.specification.locations.CrosswalkSpecification.png)
+
+[Crosswalk Specifications](locations.py) inform the system about how it should link observations to forecasts based
+on location by loading up data to link
+
+Say you have the following two data sets:
+
+**Predictions**
+
+| loc | value | unit | time |
+|-----|-------|--------|-------------------|
+| 1 | 324 | ft3/s | 2021-10-14 00:00 |
+| 1 | 322 | ft3/s | 2021-10-14 01:00 |
+| 2 | 14 | ft3/s | 2021-10-14 00:00 |
+| 2 | 13 | ft3/s | 2021-10-14 01:00 |
+
+**Observations**
+
+| site_no | measurement | unit | stage | valid_time |
+|------------|-------------|-------|-------|------------------|
+| 0446846846 | 14 | cms | 8 | 2021-10-14 00:00 |
+| 0446846846 | 18 | cms | 9 | 2021-10-14 01:00 |
+| 668465168 | 7 | cms | 5 | 2021-10-14 00:00 |
+| 668465168 | 6 | cms | 4.8 | 2021-10-14 01:00 |
+
+
+A Crosswalk Specification like:
+```json
+{
+ "backend": {
+ "backend_type": "file",
+ "address": "resources/crosswalk.json",
+ "format": "json"
+ },
+ "observation_field_name": "site_no",
+ "prediction_field_name": "loc",
+ "field": {
+ "name": "loc",
+ "where": "key",
+ "path": ["* where site_no"],
+ "origin": "$",
+ "datatype": "string",
+ "associated_fields": [
+ {
+ "name": "site_no",
+ "path": "site_no",
+ "datatype": "string"
+ }
+ ]
+ }
+}
+```
+
+will read the data from `"resources/crosswalk.json"` that looks like:
+
+```json
+{
+ "1": {
+ "value1": 1,
+ "site_no": "668465168"
+ },
+ "2": {
+ "value1": 2,
+ "site_no": "0446846846"
+ },
+ "3": {
+ "value1": 3
+ },
+ "4": {
+ "value1": 4
+ }
+}
+```
+
+And determine that rows from the **Prediction** dataset with a `loc` value of `1` should link to rows from the
+**Observation** dataset with a `site_no` value of `668465168`. The keys `"3"` and `"4"` will be totally ignored since
+the path `* where site_no` means "Everything that has a member named `site_no`" and `"3"` and `"4"` lack the member.
+
+
+### Examples
+
+Load the local `JSON` file at "resources/crosswalk.json" and extract the keys found at `"* where site_no"`
+(everything that has a `site_no` field) to use as a "prediction_location" and use its contained value
+`"site_no"` as a field named "observation_location":
+```json
+{
+ "backend": {
+ "backend_type": "file",
+ "address": "resources/crosswalk.json",
+ "format": "json"
+ },
+ "observation_field_name": "observation_location",
+ "prediction_field_name": "prediction_location",
+ "field": {
+ "name": "prediction_location",
+ "where": "key",
+ "path": ["* where site_no"],
+ "origin": "$",
+ "datatype": "string",
+ "associated_fields": [
+ {
+ "name": "observation_location",
+ "path": "site_no",
+ "datatype": "string"
+ }
+ ]
+ }
+}
+```
+
+Using templates, this may be represented as:
+```json
+{
+ "backend": {
+ "template_name": "JSON File",
+ "address": "resources/crosswalk.json"
+ },
+ "observation_field_name": "observation_location",
+ "prediction_field_name": "prediction_location",
+ "field": {
+ "template_name": "Prediction Key to Observed Site Crosswalk"
+ }
+}
+```
+
+This might yield something that looks like:
+
+| observation_location | prediction_location |
+|----------------------|---------------------|
+| 0214655255 | cat-52 |
+| 02146562 | cat-67 |
+| 0718735243 | cat-27 |
+
+The following JSON will instruct evaluations to pair observed data to predicted data where the observation's
+`observation_location` field matches the indicated value in the prediction's `predicted_location` field:
+
+```json
+{
+ "observation_field_name": "observation_location",
+ "prediction_field_name": "prediction_location"
+}
+```
+
+
+## Location Specification
+
+![A specification for where location data should be found](../../../images/dmod.evaluations.specification.locations.LocationSpecification.png)
+
+[Location Specifications](locations.py) define where to find identifiers for locations within and without loaded data.
+Some data may have locations on columns, some may have locations in filenames.
+
+
+### Example
+
+Identify locations as those being from the `site_no` column:
+```json
+{
+ "identify": true,
+ "from_field": "site_no"
+}
+```
+
+Identify location names based on the filename from files with names like `cat-27` and `cat-52` from files
+like `cat-27.csv` and `cat-52_cms.csv`:
+```json
+{
+ "identify": true,
+ "from_field": "filename",
+ "pattern": "cat-\\d\\d"
+}
+```
+
+
+
+## Threshold Definition
+
+![A definition of a single threshold, where it comes from, and its significance](../../../images/dmod.evaluations.specification.threshold.ThresholdDefinition.png)
+
+[Threshold Definitions](threshold.py) define what thresholds to apply to data, where to get their values,
+how to measure them, what to call them, and how important they are. Weights in these threshold definitions operate
+the same way as the weights for [Metric Specifications](#MetricSpecification), except values are relative to other
+threshold definitions, not metrics.
+
+The name of a threshold is optional, but naming thresholds make their meaning simpler. For instance, the `p75_va`
+field in NWIS Statistical Thresholds represents the `75th Percetile`. Someone familiar with the dataset
+may understand that raw definition, but someone _not_ familiar with these thresholds won't understand what it means.
+
+
+### Examples
+
+Use a threshold named `75th Percentile` with values from the `p75_va` field measured in `ft^3/s` with a weight of 10.
+```json
+{
+ "name": "75th Percentile",
+ "field": "p75_va",
+ "weight": 10,
+ "unit": {
+ "value": "ft^3/s"
+ }
+}
+```
+
+
+## Threshold Application Rules
+
+![Added rules for how thresholds should be applied](../../../images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png)
+
+Thresholds and the data to evaluate may not always be in the same form or scale. USGS NWIS Statistical Thresholds,
+for example, are daily values, with each day defined by `day_nu` and `month_nu` integer values. Predicted or
+observed values, though, have their temporal values defined via year, month, day, hours, and seconds. In order
+to link the correct thresholds to the correct values, [Threshold Application Rules](threshold.py) may be used to apply
+transformations on different fields to ensure that thresholds may be correctly applied to evaluation data.
+
+
+### Examples
+
+Apply the threshold to observation data by creating two new columns, one named `threshold_day` in the threshold data
+created by converting the `month_nu` and `day_nu` integer fields into one `Day` field, and another named
+`threshold_day`, created by converting the `value_date` field into one `Day` field.
+```json
+{
+ "name": "Date to Day",
+ "threshold_field": {
+ "name": "threshold_day",
+ "path": [
+ "month_nu",
+ "day_nu"
+ ],
+ "datatype": "Day"
+ },
+ "observation_field": {
+ "name": "threshold_day",
+ "path": [
+ "value_date"
+ ],
+ "datatype": "Day"
+ }
+}
+```
+
+`Day` is a `dmod.evaluations` specific type that describes a single day across any years, including leap years.
+If I have a day of `10/1` and data for the years `2016`, `2017`, and `2018`, that `10/1` will be equivalent to
+`2016-10-01`, `2017-10-01`, and `2018-10-01`.
+
+A `Day` may be defined by passing a numerical day of the year, a string date, a python date type, a pandas date type,
+a numpy date type, or a series of numbers to use as input values representing the `[]`,
+`[, ]`, or
+`[, , ]`. `[1]`, `[1, 1]`, and `[1975, 1, 1]` will
+all create the same `Day` value.
+
+
+## Threshold Specification
+
+![Instructions for how to load and apply thresholds to observed and predicted data](../../../images/dmod.evaluations.specification.threshold.ThresholdSpecification.png)
+
+[Threshold Specifications](threshold.py) define how threshold data should be loaded, how to apply thresholds to data,
+and what thresholds to use.
+
+
+### Example
+
+The following two examples load an RDB file named `resources/nwis_stat_thresholds.rdb`,
+names locations based off of the identified `site_no` column, matches loaded thresholds onto two new columns, the first
+being `threshold_day` on the threshold data, created by converting the `month_nu` and `day_nu` fields into `Day` objects,
+the second being named `threshold_day` on the observation data, created by converting the `value_date` field into
+`Day` objects. Use the `p75_va`, `p80_va`, and `p50_va` fields as thresholds named `"75th Percentile"`,
+`"80th Percentile"`, and `"Median"`, respectively, measured in `ft^3/s` and weighing `10`, `5`, and `1`, respectively.
+The `75th Percentile` threshold will be considered 10 times more important than the `Median`, while the
+`80th Percentile` will be considered half as important as the `75th Percentile`, but 5 times as important as the
+`Median`.
+
+```json
+{
+ "name": "NWIS Stat Percentiles",
+ "backend": {
+ "backend_type": "file",
+ "format": "rdb",
+ "address": "resources/nwis_stat_thresholds.rdb"
+ },
+ "locations": {
+ "identify": true,
+ "from_field": "column",
+ "pattern": "site_no"
+ },
+ "application_rules": {
+ "threshold_field": {
+ "name": "threshold_day",
+ "path": [
+ "month_nu",
+ "day_nu"
+ ],
+ "datatype": "Day"
+ },
+ "observation_field": {
+ "name": "threshold_day",
+ "path": [
+ "value_date"
+ ],
+ "datatype": "Day"
+ }
+ },
+ "definitions": [
+ {
+ "name": "75th Percentile",
+ "field": "p75_va",
+ "weight": 10,
+ "unit": {
+ "value": "ft^3/s"
+ }
+ },
+ {
+ "name": "80th Percentile",
+ "field": "p80_va",
+ "weight": 5,
+ "unit": {
+ "value": "ft^3/s"
+ }
+ },
+ {
+ "name": "Median",
+ "field": "p50_va",
+ "weight": 1,
+ "unit": {
+ "value": "ft^3/s"
+ }
+ }
+ ]
+}
+```
+
+The following example can produce the exact same results as the example from above but through the use of templates.
+
+```json
+{
+ "backend": {
+ "template_name": "NWIS Stat Thresholds"
+ },
+ "locations": {
+ "template_name": "Site Number Column"
+ },
+ "application_rules": {
+ "template_name": "Date to Day"
+ },
+ "definitions": [
+ {
+ "template_name": "75th Percentile"
+ },
+ {
+ "template_name": "80th Percentile"
+ },
+ {
+ "template_name": "Median"
+ }
+ ]
+}
+```
+
+
+## Unit Definition
+
+![A definition of what a measurement unit is or where to find it](../../../images/dmod.evaluations.specification.unit.UnitDefinition.png)
+
+[Unit Definitions](unit.py) tell the system how to interpret the values from data sources. Predictions, for instance,
+may be expressed in cubic meters per second (`cms`), while observations may be expressed in cubic _feet_ per second
+(`ft^3/s`). Values in `cms` and `ft^3/s` aren't immediately comparable, so the units need to be explicitly stated in
+case of a necessary set of unit conversions. There are three options for how to get the unit. If the `value` of `cms`
+is dictated, the system will interpret all primary values from the datasource as being measured in `cms`. If the
+`field` of `unit` is dictated, the unit will be interpreted as whatever lies within the `unit` field of the
+selected data. The former option is great for cases where the unit isn't in the dataset and is instead known via
+institutional knowledge, while the former is great for cases where the unit _is_ in the dataset.
+
+Any type of unit may be used, but unit conversions will occur if units are not the same. When these conversions take place,
+only the stock units available in [Pint](https://github.com/hgrecco/pint/blob/master/pint/default_en.txt), along with
+m3, ft3, cms, cfs, and KCFS (case insensitive), may be used.
+
+
+### Examples
+
+Use the values in the `unit` field as the name of the measurement unit:
+```json
+{
+ "field": "unit"
+}
+```
+
+Use the value `"ft^3/s"` as the unit of measurement for every piece of data loaded in this context:
+```json
+{
+ "value": "ft^3/s"
+}
+```
+
+
+
+## Metric Specification
+
+![The definition for what metric should be used and how important it should be](../../../images/dmod.evaluations.specification.scoring.MetricSpecification.png)
+
+[Metric Specifications](scoring.py) merely define what metric is intended to be used and how important it is. The
+`weight` value only bears significance to _other_ defined `weight` values. If only one Metric Specification is
+defined, the `weight` doesn't have much value since there isn't anything to compare it to. It's similar in the case
+where _all_ defined Metric Specifications have the same weight - in this case the results are all equally important.
+
+The weight becomes significant when there are varying weight values. Given Metric Specifications with weights `1`,
+`2`, `3`, and `4`, the last Metric Specification is considered as being the most important metric while the first is
+the least important. When scores are averaged, that last metric will have a far greater impact on the results than the
+first metric.
+
+Since the values are all relative, changing the above weights to `4`, `8`, `12`, and `16` will bear the same results.
+
+There are no rules for how `weight` values are defined. They may be arbitrarily high or arbitrarily low. The
+importance is that they relate to one another.
+
+
+### Example
+
+Use the metric named "Pearson Correlation Coefficient" with a relative weight of `10`
+```json
+{
+ "name": "Pearson Correlation Coefficient",
+ "weight": 10
+}
+```
+
+Use the metric named "pRoBabIliTyOfDeTecTiOn" with a relative weight of `4`
+```json
+{
+ "name": "pRoBabIliTyOfDeTecTiOn",
+ "weight": 4
+}
+```
+
+Using the above two examples at the same time will tell the evaluation that the result of the
+"Pearson Correlation Coefficient" is 250% more important than the result of "Probability of Detection"
+
+
+## Scheme Specification
+
+![A definition of what a measurement unit is or where to find it](../../../images/dmod.evaluations.specification.scoring.SchemeSpecification.png)
+
+A [Scheme Specification](scoring.py) defines the overall scoring scheme for the entire evaluation. It dictates what
+metrics to use and how important they are in comparison.
+
+
+### Examples
+
+Use the metrics "Pearson Correlation Coefficient", "Normalized Nash-Sutcliffe Efficiency",
+"Kling-Gupta Efficiency", "Probability of Detection", and "False Alarm Ratio", but consider "Pearson Correlation Coefficient"
+as the most important metric, followed by "Normalized Nash-Sutcliffe Efficiency" and "Kling-Gupta Efficiency", then followed by
+"False Alarm Ratio" and "Probability of Detection".
+
+```json
+{
+ "metrics": [
+ {
+ "name": "False Alarm Ratio",
+ "weight": 10
+ },
+ {
+ "name": "Probability of Detection",
+ "weight": 10
+ },
+ {
+ "name": "Kling-Gupta Efficiency",
+ "weight": 15
+ },
+ {
+ "name": "Normalized Nash-Sutcliffe Efficiency",
+ "weight": 15
+ },
+ {
+ "name": "Pearson Correlation Coefficient",
+ "weight": 18
+ }
+ ]
+}
+```
+
+
+### Data Source Specification Examples
+The following examples all describe the exact same datasource.
+
+The primary field for this data source will be called `observation`, which will be populated by retrieving all values
+from `values[*].value[*].value` relative to `$.value.timeSeries[*]` and interpreting them as `float`s. Each of these
+`observation` values will be accompanied by their corresponding `dateTime` found at `values[*].value[*].dateTime`,
+the location identifier at `sourceInfo.siteCode[0].value`, and the unit of measurement at `variable.unit.unitCode`,
+all relative to `$.value.timeSeries[*]`.
+
+The data will be loaded from a JSON file at "resources/observations.js".
+
+The locations will are found in the `observation_location` field and the measurement unit will be found in the `unit`
+field.
+
+The data will be paired and evaluated by a matching `value_date` value.
+
+A full configuration using no templates
+```json
+{
+ "value_field": "observation",
+ "value_selectors": [
+ {
+ "name": "observation",
+ "where": "value",
+ "path": ["values[*]", "value[*]", "value"],
+ "datatype": "float",
+ "origin": ["$", "value", "timeSeries[*]"],
+ "associated_fields": [
+ {
+ "name":"value_date",
+ "path": ["values[*]", "value[*]", "dateTime"],
+ "datatype": "datetime"
+ },
+ {
+ "name":"observation_location",
+ "path": ["sourceInfo", "siteCode", "[0]", "value"],
+ "datatype": "string"
+ },
+ {
+ "name":"unit",
+ "path": ["variable", "unit", "unitCode"],
+ "datatype": "string"
+ }
+ ]
+ }
+ ],
+ "backend": {
+ "backend_type": "file",
+ "format": "json",
+ "address": "resources/observations.json"
+ },
+ "locations": {
+ "identify": true,
+ "from_field": "observation_location"
+ },
+ "unit": {
+ "field": "unit"
+ },
+ "x_axis": "value_date"
+}
+```
+
+This configuration may be simplified by utilizing a few templates. The first template used, `JSON File`, simplifies
+the definition for how to load JSON files by only requiring a path to the file. The second template used,
+`From Observation` lets you bypass the configuration for a location by just using the common declaration.
+
+```json
+{
+ "value_field": "observation",
+ "value_selectors": [
+ {
+ "name": "observation",
+ "where": "value",
+ "path": ["values[*]", "value[*]", "value"],
+ "datatype": "float",
+ "origin": ["$", "value", "timeSeries[*]"],
+ "associated_fields": [
+ {
+ "name":"value_date",
+ "path": ["values[*]", "value[*]", "dateTime"],
+ "datatype": "datetime"
+ },
+ {
+ "name":"observation_location",
+ "path": ["sourceInfo", "siteCode", "[0]", "value"],
+ "datatype": "string"
+ },
+ {
+ "name":"unit",
+ "path": ["variable", "unit", "unitCode"],
+ "datatype": "string"
+ }
+ ]
+ }
+ ],
+ "backend": {
+ "template_name": "JSON File",
+ "address": "resources/observations.json"
+ },
+ "locations": {
+ "template_name": "From Observation"
+ },
+ "unit": {
+ "field": "unit"
+ },
+ "x_axis": "value_date"
+}
+```
+
+Templates may be used for just about every field. The most complicated aspect of the above configuration was the
+complexity of parsing NWIS JSON WaterML responses. All of that may be bypassed by instead using the `NWIS Record`
+template, which will supply the full and correct queries for every field needed when using that format. When available,
+no user should need to define any of that themselves.
+
+```json
+{
+ "value_field": "observation",
+ "value_selectors": [
+ {
+ "template_name": "NWIS Record"
+ }
+ ],
+ "backend": {
+ "template_name": "JSON File",
+ "address": "resources/observations.json"
+ },
+ "locations": {
+ "template_name": "From Observation"
+ },
+ "unit": {
+ "field": "unit"
+ },
+ "x_axis": "value_date"
+}
+```
+
+
+### Evaluation Specification Examples
+
+The following examples all describe the exact same evaluation:
+
+A full configuration using no templates
+```json
+{
+ "observations": [
+ {
+ "name": "Observations",
+ "value_field": "observation",
+ "value_selectors": [
+ {
+ "name": "observation",
+ "where": "value",
+ "path": ["values[*]", "value[*]", "value"],
+ "datatype": "float",
+ "origin": ["$", "value", "timeSeries[*]"],
+ "associated_fields": [
+ {
+ "name":"value_date",
+ "path": ["values[*]", "value[*]", "dateTime"],
+ "datatype": "datetime"
+ },
+ {
+ "name":"observation_location",
+ "path": ["sourceInfo", "siteCode", "[0]", "value"],
+ "datatype": "string"
+ },
+ {
+ "name":"unit",
+ "path": ["variable", "unit", "unitCode"],
+ "datatype": "string"
+ }
+ ]
+ }
+ ],
+ "backend": {
+ "backend_type": "file",
+ "format": "json",
+ "address": "resources/observations.json"
+ },
+ "locations": {
+ "identify": true,
+ "from_field": "value"
+ },
+ "unit": {
+ "field": "unit"
+ },
+ "x_axis": "value_date"
+ }
+ ],
+ "predictions": [
+ {
+ "name": "Predictions",
+ "value_field": "prediction",
+ "value_selectors": [
+ {
+ "name": "predicted",
+ "where": "column",
+ "associated_fields": [
+ {
+ "name": "date",
+ "datatype": "datetime"
+ }
+ ]
+ }
+ ],
+ "backend": {
+ "backend_type": "file",
+ "format": "csv",
+ "address": "resources/cat.*cfs.csv",
+ "parse_dates": ["date"]
+ },
+ "locations": {
+ "identify": true,
+ "from_field": "filename",
+ "pattern": "cat-\\d\\d"
+ },
+ "field_mapping": [
+ {
+ "field": "prediction",
+ "map_type": "column",
+ "value": "predicted"
+ },
+ {
+ "field": "prediction_location",
+ "map_type": "column",
+ "value": "location"
+ },
+ {
+ "field": "value_date",
+ "map_type": "column",
+ "value": "date"
+ }
+ ],
+ "unit": {
+ "value": "ft^3/s"
+ },
+ "x_axis": "value_date"
+ }
+ ],
+ "crosswalks": [
+ {
+ "name": "Crosswalk",
+ "backend": {
+ "backend_type": "file",
+ "address": "resources/crosswalk.json",
+ "format": "json"
+ },
+ "observation_field_name": "observation_location",
+ "prediction_field_name": "prediction_location",
+ "field": {
+ "name": "prediction_location",
+ "where": "key",
+ "path": ["* where site_no"],
+ "origin": "$",
+ "datatype": "string",
+ "associated_fields": [
+ {
+ "name": "observation_location",
+ "path": "site_no",
+ "datatype": "string"
+ }
+ ]
+ }
+ }
+ ],
+ "thresholds": [
+ {
+ "name": "NWIS Stat Percentiles",
+ "backend": {
+ "name": "NWIS Stat Thresholds",
+ "backend_type": "file",
+ "format": "rdb",
+ "address": "resources/nwis_stat_thresholds.rdb"
+ },
+ "locations": {
+ "identify": true,
+ "from_field": "column",
+ "pattern": "site_no"
+ },
+ "application_rules": {
+ "name": "Date to Day",
+ "threshold_field": {
+ "name": "threshold_day",
+ "path": [
+ "month_nu",
+ "day_nu"
+ ],
+ "datatype": "Day"
+ },
+ "observation_field": {
+ "name": "threshold_day",
+ "path": [
+ "value_date"
+ ],
+ "datatype": "Day"
+ }
+ },
+ "definitions": [
+ {
+ "name": "75th Percentile",
+ "field": "p75_va",
+ "weight": 10,
+ "unit": {
+ "value": "ft^3/s"
+ }
+ },
+ {
+ "name": "80th Percentile",
+ "field": "p80_va",
+ "weight": 5,
+ "unit": {
+ "value": "ft^3/s"
+ }
+ },
+ {
+ "name": "Median",
+ "field": "p50_va",
+ "weight": 1,
+ "unit": {
+ "value": "ft^3/s"
+ }
+ }
+ ]
+ }
+ ],
+ "scheme": {
+ "name": "Prefer Pearson, then Nash and Kling, then POD and FAR",
+ "metrics": [
+ {
+ "name": "False Alarm Ratio",
+ "weight": 10
+ },
+ {
+ "name": "Probability of Detection",
+ "weight": 10
+ },
+ {
+ "name": "Kling-Gupta Efficiency",
+ "weight": 15
+ },
+ {
+ "name": "Normalized Nash-Sutcliffe Efficiency",
+ "weight": 15
+ },
+ {
+ "name": "Pearson Correlation Coefficient",
+ "weight": 18
+ }
+ ]
+ }
+}
+```
+
+A configuration using templates
+
+```json
+{
+ "observations": [
+ {
+ "template_name": "REST Observations",
+ "backend": {
+ "params": {
+ "sites": "0214657975,0214655255",
+ "startDT": "2022-12-01T00:00%2b0000",
+ "endDT": "2022-12-31T00:00%2b0000"
+ }
+ }
+ }
+ ],
+ "predictions": [
+ {
+ "template_name": "Predictions"
+ }
+ ],
+ "crosswalks": [
+ {
+ "template_name": "Templated Crosswalk"
+ }
+ ],
+ "thresholds": [
+ {
+ "template_name": "All Templates for NWIS Stat Percentiles"
+ }
+ ],
+ "scheme": {
+ "template_name": "Prefer Pearson, then Nash and Kling, then POD and FAR"
+ }
+}
+```
+
+A configuration using templates with overridden values
+
+```json
+{
+ "observations": [
+ {
+ "template_name": "Observations from Templates"
+ }
+ ],
+ "predictions": [
+ {
+ "template_name": "Predictions"
+ }
+ ],
+ "crosswalks": [
+ {
+ "template_name": "Templated Crosswalk"
+ }
+ ],
+ "thresholds": [
+ {
+ "template_name": "All Templates for NWIS Stat Percentiles"
+ }
+ ],
+ "scheme": {
+ "template_name": "Prefer Pearson, then Nash and Kling, then POD and FAR"
+ }
+}
+```
+
+
+## All Elements
+
+When put together, the entire object tree looks like:
+
+![All Specifications](../../../images/all-from-dmod.evaluations.specification.png)
\ No newline at end of file
diff --git a/python/lib/evaluations/dmod/evaluations/specification/__init__.py b/python/lib/evaluations/dmod/evaluations/specification/__init__.py
index 6ab28e36d..41ae5d429 100644
--- a/python/lib/evaluations/dmod/evaluations/specification/__init__.py
+++ b/python/lib/evaluations/dmod/evaluations/specification/__init__.py
@@ -22,13 +22,33 @@
import typing
+SPECIFICATION_TYPES = typing.Sequence[typing.Type[Specification]]
-def get_specification_options(*args, **kwargs) -> typing.Sequence[typing.Tuple[str, str]]:
+def get_specification_types(all_specifications: bool = False, *args, **kwargs) -> SPECIFICATION_TYPES:
from .base import get_subclasses
+ if all_specifications:
+ base_class = Specification
+ else:
+ base_class = TemplatedSpecification
+
+ return get_subclasses(base_class)
+
+
+def get_specification_options(all_specifications: bool = False, *args, **kwargs) -> typing.Sequence[typing.Tuple[str, str]]:
+ from .base import get_subclasses
+
+ if all_specifications:
+ base_class = Specification
+ else:
+ base_class = TemplatedSpecification
+
return [
- (cls.get_specification_type(), cls.get_specification_description())
- for cls in get_subclasses(TemplatedSpecification)
+ (
+ cls.get_specification_type(),
+ cls.get_specification_description()
+ )
+ for cls in get_subclasses(base_class)
]
diff --git a/python/lib/evaluations/dmod/evaluations/specification/scoring.py b/python/lib/evaluations/dmod/evaluations/specification/scoring.py
index b0738260d..fd509073d 100644
--- a/python/lib/evaluations/dmod/evaluations/specification/scoring.py
+++ b/python/lib/evaluations/dmod/evaluations/specification/scoring.py
@@ -20,6 +20,9 @@
class MetricSpecification(TemplatedSpecification):
+ """
+ The definition for what metric should be used and how important it should be
+ """
weight: typing.Union[float] = Field(description="A relative rating of the significance of this metric")
def __eq__(self, other: MetricSpecification) -> bool:
@@ -52,6 +55,9 @@ def apply_configuration(
class SchemeSpecification(TemplatedSpecification):
+ """
+ Instructions for how metrics should be applied to observations and forecasts along with how to interpret them
+ """
class Config:
fields = {
"metric_functions": {
diff --git a/python/lib/evaluations/dmod/evaluations/specification/threshold.py b/python/lib/evaluations/dmod/evaluations/specification/threshold.py
index 1da4a7595..c26861268 100644
--- a/python/lib/evaluations/dmod/evaluations/specification/threshold.py
+++ b/python/lib/evaluations/dmod/evaluations/specification/threshold.py
@@ -296,6 +296,9 @@ def __str__(self):
class ThresholdSpecification(LoaderSpecification):
+ """
+ Instructions for how to load and apply thresholds to observed and predicted data
+ """
definitions: typing.List[ThresholdDefinition] = Field(
description="The thresholds to apply to data"
)
diff --git a/python/lib/evaluations/dmod/evaluations/util.py b/python/lib/evaluations/dmod/evaluations/util.py
index 881979ced..d765461e1 100644
--- a/python/lib/evaluations/dmod/evaluations/util.py
+++ b/python/lib/evaluations/dmod/evaluations/util.py
@@ -603,13 +603,14 @@ def __init__(
day = possible_args[0]
elif len(possible_args) == 2:
# We are going to interpret this as month-day
+ # The year doesn't matter since we are focused on a day of the year separate from the year itself
day = pandas.Timestamp(year=2020, month=possible_args[0], day=possible_args[1])
elif len(possible_args) > 3:
# We're going to interpret this as year-month-day. Further args may include time, but those are not
# important for this
day = pandas.Timestamp(year=possible_args[0], month=possible_args[1], day=possible_args[2])
else:
- raise ValueError("A list of no numbers was passed; a Day cannot be interpretted.")
+ raise ValueError("A list of no numbers was passed; a Day cannot be interpreted.")
if isinstance(day, str) and value_is_number(day):
day = float(day)
diff --git a/python/lib/evaluations/images/all-from-dmod.evaluations.specification.png b/python/lib/evaluations/images/all-from-dmod.evaluations.specification.png
new file mode 100644
index 000000000..68a1227b2
Binary files /dev/null and b/python/lib/evaluations/images/all-from-dmod.evaluations.specification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.backend.BackendSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.backend.BackendSpecification.png
new file mode 100644
index 000000000..b3194a68c
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.backend.BackendSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.data.DataSourceSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.data.DataSourceSpecification.png
new file mode 100644
index 000000000..50615d92c
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.data.DataSourceSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png
new file mode 100644
index 000000000..4d2b20bf6
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.evaluation.EvaluationSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.fields.AssociatedField.png b/python/lib/evaluations/images/dmod.evaluations.specification.fields.AssociatedField.png
new file mode 100644
index 000000000..2e7f88541
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.fields.AssociatedField.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.fields.FieldMappingSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.fields.FieldMappingSpecification.png
new file mode 100644
index 000000000..bf8d34b7c
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.fields.FieldMappingSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.fields.ValueSelector.png b/python/lib/evaluations/images/dmod.evaluations.specification.fields.ValueSelector.png
new file mode 100644
index 000000000..6122c3ed0
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.fields.ValueSelector.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.locations.CrosswalkSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.locations.CrosswalkSpecification.png
new file mode 100644
index 000000000..09534e6bf
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.locations.CrosswalkSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.locations.LocationSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.locations.LocationSpecification.png
new file mode 100644
index 000000000..d31a586e7
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.locations.LocationSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.scoring.MetricSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.MetricSpecification.png
new file mode 100644
index 000000000..ee2145bee
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.MetricSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.scoring.SchemeSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.SchemeSpecification.png
new file mode 100644
index 000000000..b217f10e4
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.scoring.SchemeSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png
new file mode 100644
index 000000000..7dab285f2
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdApplicationRules.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdDefinition.png b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdDefinition.png
new file mode 100644
index 000000000..4291704e5
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdDefinition.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdSpecification.png b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdSpecification.png
new file mode 100644
index 000000000..a534878a9
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.threshold.ThresholdSpecification.png differ
diff --git a/python/lib/evaluations/images/dmod.evaluations.specification.unit.UnitDefinition.png b/python/lib/evaluations/images/dmod.evaluations.specification.unit.UnitDefinition.png
new file mode 100644
index 000000000..1240d9bd8
Binary files /dev/null and b/python/lib/evaluations/images/dmod.evaluations.specification.unit.UnitDefinition.png differ
diff --git a/python/lib/evaluations/update_diagrams.py b/python/lib/evaluations/update_diagrams.py
new file mode 100644
index 000000000..c3f68e820
--- /dev/null
+++ b/python/lib/evaluations/update_diagrams.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""
+@TODO: Describe the application here
+"""
+import os
+import sys
+import typing
+
+import multiprocessing
+
+from pathlib import Path
+
+import dmod.evaluations.specification as specification
+
+from argparse import ArgumentParser
+
+
+try:
+ import erdantic
+except:
+ print(
+ "Erdantic is required in order to produce diagrams, but was not found. "
+ "Please install a compatible version if updated diagrams are needed.",
+ file=sys.stderr
+ )
+ exit(255)
+
+
+DIAGRAM_DIRECTORY = "./images"
+
+
+class Arguments(object):
+ def __init__(self, *args):
+ # Replace '__option' with any of the expected arguments
+ self.__option: typing.Optional[str] = None
+
+ self.__parse_command_line(*args)
+
+ # Add a property for each argument
+ @property
+ def option(self) -> str:
+ return self.__option
+
+ def __parse_command_line(self, *args):
+ parser = ArgumentParser("Put the description of your application here")
+
+ # Add Arguments
+ parser.add_argument(
+ "-o",
+ metavar="option",
+ dest="option",
+ type=str,
+ default="default",
+ help="This is an example of an option"
+ )
+
+ # Parse the list of args if one is passed instead of args passed to the script
+ if args:
+ parameters = parser.parse_args(args)
+ else:
+ parameters = parser.parse_args()
+
+ # Assign parsed parameters to member variables
+ self.__option = parameters.option
+
+
+def main():
+ """
+ Define your main function here
+ """
+ arguments = Arguments()
+ classes_to_diagram: typing.Sequence[typing.Type[specification.Specification]] = specification.get_specification_types(all_specifications=True)
+ failures: typing.List[str] = []
+
+ class_to_diagram: typing.Optional[typing.Type[specification.Specification]] = None
+
+ for class_to_diagram in classes_to_diagram:
+ full_name = f"{class_to_diagram.__module__}.{class_to_diagram.__qualname__}"
+ output_path = os.path.join(DIAGRAM_DIRECTORY, f"{full_name}.png")
+
+ try:
+ erdantic.draw(class_to_diagram, out=output_path, depth_limit=0)
+ except BaseException as exception:
+ message = f"Failed to draw a graph at: '{full_name}'{os.linesep} {exception}"
+ failures.append(message)
+ else:
+ real_path = Path(output_path)
+ print(f"Wrote a diagram for '{full_name}' to '{real_path.resolve()}'")
+
+ if classes_to_diagram is not None:
+ diagram_for_all_path = f"all-from-dmod.evaluations.specification"
+ output_path = os.path.join(DIAGRAM_DIRECTORY, f"{diagram_for_all_path}.png")
+
+ try:
+ erdantic.draw(*classes_to_diagram, out=output_path, depth_limit=9999, orientation=erdantic.Orientation.VERTICAL)
+ except BaseException as exception:
+ message = f"Failed to draw a graph at: '{diagram_for_all_path}'{os.linesep} {exception}"
+ failures.append(message)
+ else:
+ real_path = Path(output_path)
+ print(f"Wrote a diagram for '{diagram_for_all_path}' to '{real_path.resolve()}'")
+
+ for message in failures:
+ print(message, file=sys.stderr)
+
+ exit(len(failures))
+
+if __name__ == "__main__":
+ main()
diff --git a/python/lib/metrics/README.md b/python/lib/metrics/README.md
index 96dd36434..c7f06d38d 100644
--- a/python/lib/metrics/README.md
+++ b/python/lib/metrics/README.md
@@ -1,5 +1,293 @@
-# About
-Python package for utilities related to forcing, meta, and other modeling-related data for NWM MaaS.
+# Distributed Model on Demand - Metrics
-# Structure
-Structure has been modified from original to have inner duplicate directory in order to comply with general Python packaging structure. This facilitates executing tests in a variety of different scenarios, including running integration tests on a local machine using the included test script and/or running unit tests directly within an IDE.
+The Distributed Model on Demand (DMOD) metrics package is a python library dedicated exclusively
+to describing available functionality and performing mathematical operations upon provided data.
+
+## How are metrics called?
+
+There lies a class at `dmod.metrics.scoring` named `ScoringScheme` that manages operations.
+You first provide it a list of all metrics you intend to gather from your input data and a set of
+`Communicator` objects to help distribute information generated during evaluation. Next, you can
+call the `ScoringScheme` as a function with prepared pairs, where to find "observed" values within
+the pairs, where to find the "predicted" values within the pairs, and thresholds. This invocation
+will yield a `MetricResults` object.
+
+```python
+import pandas
+import dmod.metrics as metrics
+
+pairs = pandas.read_csv("path/to/pairs.csv")
+
+scheme = metrics.ScoringScheme([
+ metrics.KlingGuptaEfficiency(3),
+ metrics.PearsonCorrelationCoefficient(6),
+])
+
+results = scheme.score(pairs=pairs, observed_value_label="observation", predicted_value_label="prediction")
+```
+
+This same `ScoringScheme` may be called many times over with different sets of data, usually
+corresponding to different locations, while maintaining a common standard of expected results.
+
+```python
+results2 = scheme.score(pairs=pandas.read_csv("path/to/pairs2.csv"), observed_value_label="observation", predicted_value_label="prediction")
+results3 = scheme.score(pairs=pandas.read_csv("path/to/pairs3.csv"), observed_value_label="observation", predicted_value_label="prediction")
+results4 = scheme.score(pairs=pandas.read_csv("path/to/pairs4.csv"), observed_value_label="observation", predicted_value_label="prediction")
+```
+
+## What do `MetricResults` provide?
+
+`dmod.metrics.MetricResults` objects provide access to individual metrics and tools for interpreting
+results in different ways and making it easier to serialize results for further communication.
+
+## How can `MetricResults` interpret the outcome of an evaluation?
+
+Each `MetricResults` object contains the evaluated metrics performed on a singular set of data.
+When providing the `Metric`s that are run to the `ScoringScheme`, a weight is passed along for
+each metric. Passing thresholds to the invocation will provide weights for each threshold. This
+provides a basis to establish a hierarchy of importance for each metric and threshold. For
+example, the `Critical Success Index` may be deemed as twice as important as the results for
+the `Normalized Nash-Sutcliffe Efficiency` and results for the "Major" threshold may be deemed
+1.5 times more important than the results for the "Moderate" threshold.
+
+These weights, along with metadata for each metric provides a means of scaling and grading. The
+`PearsonCorrelationCoefficient` `Metric` class, for example, stores the maximum value as `1`,
+the minimum value as `-1`, the ideal value as `1`, and `0` marks a total failure of the predicted
+data to correlate in some fashion to the observed data (negative values aren't considered a total
+failure since they indicate some degree of negative correlation whereas `0` is absolutely none
+whatsoever). For the following example, let us say that the result of a given instance of
+`PearsonCorrelationCoefficent` with a weight of `7` has a result of `0.6734` for the "Major"
+threshold. This is interpreted as a scaled value of `4.7138`. Now let us say we have an
+instance of `ProbabilityOfFalseDetection` with a weight of `3` that has a result of `0.12`.
+This is interpreted as having a scaled value of `2.64`. If those are the only metrics considered
+for the "Major" threshold, the maximum possible value for that threshold is `10` (
+`PearsonCorrelationCoefficient` with a weight of `7` and `ProbabilityOfFalseDetection` with
+a weight of `3`). Since those results were `4.7138` and `2.64`, their total value was `7.3538`
+out of `10`, or a grade of `73.538%`. Now say the only other threshold being evaluated was
+"Moderate" with a weight of `3` and a total of `2.73`, or a grade of `91%`. The overall result
+of the combination of these two thresholds is now `7.3538` + `2.73` out of `10` + `3`, or
+`10.0838` out of `13`, with a grade of `77.567%`.
+
+## What is a `Metric` in the codebase?
+
+An instance of `dmod.metrics.scoring.Metric` is an object that may be called to provide scores
+for a collection of pairs. Examples of these
+`dmod.metrics.scoring.Metric` classes are `dmod.metrics.ProbabilityOfFalseDetection`
+and `dmod.metrics.PearsonCorrelationCoefficient`. These are constructed with a given weight, so
+an instance of `dmod.metrics.ProbabilityOfDetection` may be created with a weight of `3` and
+an instance of `dmod.metrics.KlingGuptaEfficiency` may be created with a weight of `8`.
+
+New metrics may be implemented simply by declaring the new class and implementing `get_description`,
+which will allow outside code to discover what this new metric is expected to do, `get_name`, which
+provides an easy to identify name, and `__call__`, which is where the actual operation will be
+performed.
+
+A very simple example would be:
+
+```python
+class VolumeError(scoring.Metric):
+ @classmethod
+ def get_descriptions(cls):
+ return "The difference between the observed volume and the predicted volume"
+
+ @classmethod
+ def get_name(cls) -> str:
+ return "Volume Error"
+
+ def __init__(self, weight: NUMBER):
+ """
+ Constructor
+
+ Args:
+ weight: The relative significance of the metric
+ """
+ super().__init__(
+ weight=weight,
+ ideal_value=0,
+ greater_is_better=False
+ )
+
+ def __call__(
+ self,
+ pairs: pandas.DataFrame,
+ observed_value_label: str,
+ predicted_value_label: str,
+ thresholds: typing.Sequence[Threshold] = None,
+ *args,
+ **kwargs
+ ) -> scoring.Scores:
+ scores: typing.List[scoring.Score] = list()
+
+ for volume_threshold in thresholds:
+ filtered_pairs = volume_threshold(pairs)
+ difference = 0
+ if not filtered_pairs.empty:
+ dates: typing.List[int] = [value.astype("int") for value in filtered_pairs.index.values]
+ area_under_observations = sklearn.metrics.auc(dates, filtered_pairs[observed_value_label])
+ area_under_predictions = sklearn.metrics.auc(dates, filtered_pairs[predicted_value_label])
+ difference = area_under_predictions - area_under_observations
+ scores.append(scoring.Score(self, difference, volume_threshold, sample_size=len(filtered_pairs)))
+
+ return scoring.Scores(self, scores)
+```
+
+This provides everything needed to use outside logic provided by `SciKit-Learn` _and_ make the metric
+easy to explore.
+
+## What about categorical metrics?
+
+`dmod.metrics` has a wide range of support for categorical metrics primarily due to their bounded nature.
+This means that they naturally have well-defined information for scaling and grading.
+
+Implementing categorical metrics is relatively easy since they rely on truth tables. Probability of Detection,
+for example, is incredibly easy to implement:
+
+```python
+class ProbabilityOfDetection(CategoricalMetric):
+ @classmethod
+ def get_descriptions(cls):
+ return "The probability that something was detected. Sensitive to hits, but ignores false alarms. " \
+ "Very sensitive to the climatological frequency of the event. Good for rare events."
+
+ def _get_values(self, tables: categorical.TruthTables) -> typing.Iterable[KEY_AND_ROW]:
+ return tables.probability_of_detection.iterrows()
+
+ @classmethod
+ def get_metadata(cls) -> categorical.CategoricalMetricMetadata:
+ return categorical.TruthTable.get_metric_metadata("probability_of_detection")
+```
+
+All this ends up doing is gathering the `probability_of_detection` function that has already been defined
+on `TruthTables` collections that hold `TruthTable` objects:
+
+```python
+ @property
+ def probability_of_detection(self) -> pandas.DataFrame:
+ """
+ A frame depicting the probability of detection for each truth table
+ """
+ probabilities_of_detection = [
+ {
+ "series_weight": self.__weight,
+ "threshold": table.name,
+ "threshold_weight": table.weight,
+ "value": table.probability_of_detection(),
+ "sample_size": len(table)
+ }
+ for table in self.__tables.values()
+ ]
+
+ return pandas.DataFrame(probabilities_of_detection)
+```
+
+## What is meant by "Discoverability"?
+
+Metadata may be collected about each implemented Metric without any sort of outside code or any
+needed hardcoding. Anything that may invoke `dmod.metrics` may be able to perform operations such as:
+
+```python
+from pprint import pprint
+import dmod.metrics as metrics
+
+pprint(metrics.get_metric_options())
+```
+
+and see:
+
+```shell
+[{'description': 'The probability that something was detected. Sensitive to '
+ 'hits, but ignores false alarms. Very sensitive to the '
+ 'climatological frequency of the event. Good for rare events.',
+ 'identifier': 'probabilityofdetection',
+ 'name': 'Probability Of Detection'},
+ {'description': 'The probability that something was falsely reported as '
+ 'happening. Sensitive to false alarms, but ignores misses. '
+ 'Very sensitive to the climatological frequency of the event. '
+ 'Should be used in conjunction with the probability of '
+ 'detection.',
+ 'identifier': 'falsealarmratio',
+ 'name': 'False Alarm Ratio'},
+ {'description': 'Sensitive to false alarms, but ignores misses. Can be '
+ "artificially improved by issuing fewer 'yes' forecasts to "
+ 'reduce the number of false alarms. Not often reported for '
+ 'deterministic forecasts, but is an important component of '
+ 'the Relative Operating Characteristic (ROC) used widely for '
+ 'probabilistic forecasts.',
+ 'identifier': 'probabilityoffalsedetection',
+ 'name': 'Probability Of False Detection'},
+ ...
+```
+
+This provides more than enough information needed to build user interfaces or gather information
+to provide further context to data. Say I receive a value for `"equitablethreatscore"`. What does
+that value indicate? Well, the metadata explains that the `"equitablethreatscore"` may be
+displayed as `"Equitable Threat Score"` and describes `'How well did the forecast "yes" events correspond
+to the observed "yes" events (accounting for hits due to chance)? Sensitive to hits. Because it
+penalises both misses and false alarms in the same way, it does not distinguish the source of
+forecast error.'`
+
+## What is a `Communicator`?
+
+A `dmod.metrics.Communicator` is an event based mechanism for handling data emission events.
+`Communicator`s are stored within `dmod.metrics.CommunicatorGroup`s which may handle more wide scale
+communication operations. An example can be found in the `score` function of
+`dmod.metrics.ScoringScheme` where each evaluation of a metric is announced:
+
+```python
+ for metric in self.__metrics: # type: Metric
+ self.__communicators.info(f"Calling {metric.name}", verbosity=Verbosity.LOUD, publish=True)
+ ...
+```
+
+This means that the `info` event will be triggered on each held `Communicator`, but only on those set to handle
+messages of a verbosity of `LOUD` or greater, and to call the `write` event after doing so.
+
+Say I have three communicators:
+
+1. Writes errors to stderr with a verbosity of `LOUD` (operates when very little data is necessary)
+2. Writes information with a verbosity of `QUIET` to a file
+3. Sends `LOUD` messages through Redis channels
+(see `dmod.evaluation_service.utilities.communication.RedisCommunicator`)
+
+Per the above example, `Communicator` 1 won't perform any operations because it only handles errors and this
+was just standard information. `Communicator` 2 won't perform operations because the given message was meant to be loud
+and `Communicator` 2 is meant to only handle `QUIET` data. `Communicator` 3, though, will handle the message by
+transforming the information into a common format, adding it to a list in a specified Redis instance, and call the
+`write` command on it. The `write` command will send the message to all clients listening to that redis channel
+and send that transformed data to any added handlers for the `write` event.
+
+## How can I invoke a metric?
+
+There's not a lot of complexity when it comes to just calling metrics:
+
+```python
+from pprint import pprint
+
+import pandas
+
+import dmod.metrics as metrics
+
+pearson = metrics.PearsonCorrelationCoefficient(5)
+observation_key = "observed"
+model_key = "modeled"
+data = pandas.DataFrame({observation_key: [1, 2, 3, 4, 5], model_key: [2, 3, 4, 5, 6]})
+results = pearson(data, observation_key, model_key)
+pprint(results.to_dict())
+```
+
+which yields:
+
+```shell
+{'grade': '100.00%',
+ 'scaled_value': 4.99,
+ 'scores': {'All': {'failed': False,
+ 'grade': 99.999,
+ 'sample_size': 5,
+ 'scaled_value': 0.99,
+ 'value': 0.99,
+ 'weight': 1}},
+ 'total': 0.9999999999999999}
+```
+
+`dmod.evaluations` provides functionality that helps with more advanced operations
+(such as adding thresholds and operating upon many metrics)
\ No newline at end of file
diff --git a/python/lib/metrics/dmod/metrics/metric.py b/python/lib/metrics/dmod/metrics/metric.py
index a30643691..30a229492 100644
--- a/python/lib/metrics/dmod/metrics/metric.py
+++ b/python/lib/metrics/dmod/metrics/metric.py
@@ -6,8 +6,8 @@
* Forecast Verification - Issues, Methods and FAQ, Ebert,
https://www.cawcr.gov.au/projects/verification/verif_web_page.html
"""
+from __future__ import annotations
-import os
import typing
import abc
import math
@@ -42,7 +42,7 @@
def is_type(value: object, value_type: typing.Type) -> bool:
"""
- Determines whether or not the given value matches the given type
+ Determines whether the given value matches the given type
This can be used to evaluated types such as unions since you cannot use `isinstance`
@@ -51,7 +51,7 @@ def is_type(value: object, value_type: typing.Type) -> bool:
value_type: The type to check against
Returns:
- Whether or not the value matches the given type
+ Whether the value matches the given type
"""
value_is_valid = False
@@ -184,7 +184,7 @@ def find_truthtables_key(**kwargs) -> typing.Optional[str]:
Returns:
"TRUTH_TABLES" if there's a TruthTables in the kwargs, otherwise the first TruthTables present if it exists
"""
- # Find all TruthTables in the passed kwargs
+ # Find all TruthTables in the provided kwargs
keys = [
key
for key, value in kwargs.items()
diff --git a/python/lib/metrics/dmod/metrics/scoring.py b/python/lib/metrics/dmod/metrics/scoring.py
index 63aa083b7..53110114d 100644
--- a/python/lib/metrics/dmod/metrics/scoring.py
+++ b/python/lib/metrics/dmod/metrics/scoring.py
@@ -470,7 +470,7 @@ def __str__(self):
return f"{self.name}: {self.value} out of {self.maximum_value}"
-class Scores(abstract_collections.Sized, abstract_collections.Iterable):
+class Scores(abstract_collections.Sequence):
def __len__(self) -> int:
return len(self.__results)
@@ -592,7 +592,7 @@ def __repr__(self) -> str:
return self.__str__()
-class MetricResults(object):
+class MetricResults:
"""
A mapping thresholds to a variety of metrics and their values