Merge pull request #21 from jhoshiko/feature/update-visualizations

Add Model Visualization Methods
NREL · May 30, 2024 · 76c22c4 · 76c22c4
2 parents ecd3673 + 64125a8
commit 76c22c4
Show file tree

Hide file tree

Showing 2 changed files with 161 additions and 42 deletions.
diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py
@@ -2,8 +2,9 @@
 
 from dataclasses import dataclass
 import json
+from math import isinf
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, TYPE_CHECKING, Union
 from urllib import request
 
 import pandas as pd
@@ -13,13 +14,22 @@
     feature_names_to_id,
 )
 
+from nrel.routee.powertrain.core.features import feature_id_to_names
 from nrel.routee.powertrain.core.metadata import Metadata
 from nrel.routee.powertrain.core.real_world_adjustments import ADJUSTMENT_FACTORS
 from nrel.routee.powertrain.estimators.estimator_interface import Estimator
 from nrel.routee.powertrain.estimators.onnx import ONNXEstimator
 from nrel.routee.powertrain.estimators.smart_core import SmartCoreEstimator
+
+from nrel.routee.powertrain.validation.feature_visualization import (
+    contour_plot,
+    visualize_features,
+)
 from nrel.routee.powertrain.validation.errors import ModelErrors
 
+if TYPE_CHECKING:
+    from pandas import Series
+
 REGISTERED_ESTIMATORS = {
     "ONNXEstimator": ONNXEstimator,
     "SmartCoreEstimator": SmartCoreEstimator,
@@ -175,6 +185,103 @@ def to_file(self, file: Union[str, Path]):
         with path.open("w") as f:
             json.dump(output_dict, f)
 
+    def visualize_features(
+        self,
+        estimator_id: FeatureSetId,
+        n_samples: Optional[int] = 100,
+        output_path: Optional[str] = None,
+        return_predictions: Optional[bool] = False,
+    ) -> Optional[Dict[str, "Series"]]:
+        """
+        generates test links to independently test the model's features
+        and creates plots of those predictions for the given estimator id
+
+        Args:
+            estimator_id: the estimator id for generating the plots
+            n_samples: the number of samples used to generate the plots
+            output_path: an optional path to save the plots as png files.
+            return_predictions: if true, returns the dictionary containing the prediction values
+
+        Returns: optionally returns a dictionary containing the predictions where the key is the feature tested
+        """
+        feature_set = self.metadata.config.get_feature_set(
+            feature_id_to_names(estimator_id)
+        )
+        if feature_set is None:
+            raise KeyError(
+                f"Model does not have a feature set with the features: {feature_id_to_names(estimator_id)}"
+            )
+        feature_ranges = {}
+        for f in feature_set.features:
+            if isinf(f.constraints.upper) or isinf(f.constraints.lower):
+                raise ValueError(
+                    f"Feature: {f.name} has constraints with positive/negative infinity in the lower/upper bound. "
+                    f"You can add constraints when training a model or set custom constraints during visualization using "
+                    f"nrel.routee.powertrain.validation.feature_visualization.visualize_features"
+                )
+            feature_ranges[f.name] = {
+                "upper": f.constraints.upper,
+                "lower": f.constraints.lower,
+                "n_samples": n_samples,
+            }
+
+        return visualize_features(
+            model=self,
+            feature_ranges=feature_ranges,
+            output_path=output_path,
+            return_predictions=return_predictions,
+        )
+
+    def contour(
+        self,
+        estimator_id: FeatureSetId,
+        x_feature: str,
+        y_feature: str,
+        n_samples: Optional[int] = 100,
+        output_path: Optional[str] = None,
+    ):
+        """
+        generates a contour plot of the two test features: x_feature and y_feature.
+        for the given estimator id
+
+        Args:
+            estimator_id: the estimator id for generating the plots
+            x_feature: one of the features used to generate the energy matrix
+                and will be the x-axis feature
+            y_feature: one of the features used to generate the energy matrix
+                and will be the y-axis feature
+            n_samples: the number of samples used to generate the plots
+            output_path: an optional path to save the plots as png files.
+        """
+        feature_set = self.metadata.config.get_feature_set(
+            feature_id_to_names(estimator_id)
+        )
+        if feature_set is None:
+            raise KeyError(
+                f"Model does not have a feature set with the features: {feature_id_to_names(estimator_id)}"
+            )
+        feature_ranges = {}
+        for f in feature_set.features:
+            if isinf(f.constraints.upper) or isinf(f.constraints.lower):
+                raise ValueError(
+                    f"Feature: {f.name} has constraints with positive/negative infinity in the lower/upper bound. "
+                    f"You can add constraints when training a model or set custom constraints during visualization using "
+                    f"nrel.routee.powertrain.validation.feature_visualization.contour_plot"
+                )
+            feature_ranges[f.name] = {
+                "upper": f.constraints.upper,
+                "lower": f.constraints.lower,
+                "n_samples": n_samples,
+            }
+
+        return contour_plot(
+            model=self,
+            x_feature=x_feature,
+            y_feature=y_feature,
+            feature_ranges=feature_ranges,
+            output_path=output_path,
+        )
+
     def predict(
         self,
         links_df: pd.DataFrame,

diff --git a/nrel/routee/powertrain/validation/feature_visualization.py b/nrel/routee/powertrain/validation/feature_visualization.py
@@ -1,34 +1,38 @@
 import logging
 import traceback
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, TYPE_CHECKING, Union
 
 import numpy as np
 from pandas import DataFrame
 
-from nrel.routee.powertrain.core.model import Model
+if TYPE_CHECKING:
+    from nrel.routee.powertrain.core.model import Model
+    from pandas import Series
 
 log = logging.getLogger(__name__)
 
 
 def visualize_features(
-    model: Model,
+    model: "Model",
     feature_ranges: Dict[str, dict],
-    output_path: Optional[str] = None,
-) -> dict:
+    output_path: Optional[Union[str, Path]] = None,
+    return_predictions: Optional[bool] = False,
+) -> Optional[Dict[str, "Series"]]:
     """
     takes a model and generates test links to independently test the model's features
     and creates plots of those predictions
 
-    :param model: the model to be tested
-    :param feature_ranges: a dictionary with value ranges to generate test links
-    :param output_path: if not none, saves results to this location. Else the plots
-        are displayed rather than saved
+    Args:
+        model: the model that will be used to generate the plots
+        feature_ranges: a nested dictionary where each key should be a feature name and
+            each value should be another dictionary containing "lower", "upper", and "n_sample" keys/values.
+            These correspond to the lower/upper boundaries and n samples used to generate the plot.
+            n_samples must be an integer and lower/upper are floats.
+        output_path: an optional path to save the plots as png files.
+        return_predictions: if true, returns the dictionary containing the prediction values
 
-    :return: a dictionary containing the predictions where the key is the feature tested
-
-    :raises Exception due to IOErrors, KeyError due to missing features ranges required
-        by the model
+    Returns: optionally returns a dictionary containing the predictions where the key is the feature tested
     """
     try:
         import matplotlib.pyplot as plt
@@ -83,9 +87,9 @@ def visualize_features(
         sample_points = []
         for feature_name in feature_units_dict.keys():
             points = np.linspace(
-                feature_ranges[feature_name]["min"],
-                feature_ranges[feature_name]["max"],
-                feature_ranges[feature_name]["steps"],
+                feature_ranges[feature_name]["lower"],
+                feature_ranges[feature_name]["upper"],
+                feature_ranges[feature_name]["n_samples"],
             )
             sample_points.append(points)
 
@@ -124,11 +128,11 @@ def visualize_features(
         # if an output filepath is specified, save th results instead of displaying them
         if output_path is not None:
             try:
-                Path(output_path).joinpath(f"{model_name}").mkdir(
-                    parents=True, exist_ok=True
-                )
+                if isinstance(output_path, str):
+                    output_path = Path(output_path)
+                output_path.joinpath(f"{model_name}").mkdir(parents=True, exist_ok=True)
                 plt.savefig(
-                    Path(output_path).joinpath(f"{model_name}/{current_feature}.png"),
+                    output_path.joinpath(f"{model_name}/{current_feature}.png"),
                     format="png",
                 )
             except Exception:
@@ -145,31 +149,34 @@ def visualize_features(
         plt.clf()
         predictions[current_feature] = prediction
 
-    return predictions
+    if return_predictions:
+        return predictions
+    else:
+        return None
 
 
 def contour_plot(
-    model: Model,
+    model: "Model",
     x_feature: str,
     y_feature: str,
     feature_ranges: Dict[str, Dict],
-    output_path: Optional[str] = None,
+    output_path: Optional[Union[str, Path]] = None,
 ):
     """
     takes a model and generates a contour plot of the two test features:
-    x_Feature and y_feature.
-
-    :param model: the model to be tested
-    :param x_feature: one of the features used to generate the energy matrix
-        and will be the x-axis feature
-    :param y_feature: one of the features used to generate the energy matrix
-        and will be the y-axis feature
-    :param feature_ranges: a dictionary with value ranges to generate test links
-    :param output_path: if not none, saves results to this location.
-        Else the plot is displayed rather than saved
-
-    :raises Exception due to IOErrors, KeyError due to missing features ranges required
-    by the model, KeyError due to incompatible x/y features
+    x_feature and y_feature.
+
+    Args:
+        model: the model that will be used to generate the plots
+        x_feature: one of the features used to generate the energy matrix
+            and will be the x-axis feature
+        y_feature: one of the features used to generate the energy matrix
+            and will be the y-axis feature
+        feature_ranges: a nested dictionary where each key should be a feature name and
+            each value should be another dictionary containing "lower", "upper", and "n_sample" keys/values.
+            These correspond to the lower/upper boundaries and n samples used to generate the plot.
+            n_samples must be an integer and lower/upper are floats.
+        output_path: an optional path to save the plot as a png file.
     """
     try:
         import matplotlib.pyplot as plt
@@ -219,9 +226,9 @@ def contour_plot(
 
     points = {
         n: np.linspace(
-            f["min"],
-            f["max"],
-            f["steps"],
+            f["lower"],
+            f["upper"],
+            f["n_samples"],
         )
         for n, f in feature_ranges.items()
     }
@@ -252,9 +259,12 @@ def contour_plot(
     # if an output filepath is specified, save th results instead of displaying them
     if output_path is not None:
         try:
+            if isinstance(output_path, str):
+                output_path = Path(output_path)
+            output_path.joinpath(f"{model_name}").mkdir(parents=True, exist_ok=True)
             plt.savefig(
-                Path(output_path).joinpath(
-                    f"{model_name}_[{x_feature}_{y_feature}].png"
+                output_path.joinpath(
+                    f"{model_name}/{model_name}_[{x_feature}_{y_feature}].png"
                 ),
                 format="png",
             )
@@ -265,3 +275,5 @@ def contour_plot(
         plt.show()
 
     plt.close()
+
+    return None