Merge branch 'changes-to-data-indices-in-anemoi-models' of github.com…

…:ecmwf/anemoi-training into changes-to-data-indices-in-anemoi-models
ecmwf · Aug 28, 2024 · 937160f · 937160f
2 parents fe5200f + 5d845b5
commit 937160f
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,11 @@ Keep it human-readable, your future self will thank you!
 
 - Introduction of remapper to anemoi-models leads to changes in the data indices and some preprocessors cannot be applied in-place anymore.
 
+#### Functionality
+
+- Enable the callback for plotting a histogram for variables containing NaNs
+- Enforce same binning for histograms comparing true data to predicted data
+
 ## [0.1.0 - Anemoi training - First release](https://github.com/ecmwf/anemoi-training/compare/x.x.x...0.1.0) - 2024-08-16
 
 ### Added

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# anemoi-utils
+# anemoi-training
 
 **DISCLAIMER**
 This project is **BETA** and will be **Experimental** for the foreseeable future.

diff --git a/src/anemoi/training/diagnostics/plots.py b/src/anemoi/training/diagnostics/plots.py
@@ -197,7 +197,7 @@ def plot_power_spectrum(
         ax[plot_idx].loglog(
             np.arange(1, amplitude_t.shape[0]),
             amplitude_t[1 : (amplitude_t.shape[0])],
-            label="Truth (ERA5)",
+            label="Truth (data)",
         )
         ax[plot_idx].loglog(
             np.arange(1, amplitude_p.shape[0]),
@@ -279,24 +279,34 @@ def plot_histogram(
     for plot_idx, (variable_idx, (variable_name, output_only)) in enumerate(parameters.items()):
         yt = y_true[..., variable_idx].squeeze()
         yp = y_pred[..., variable_idx].squeeze()
+        # postprocessed outputs so we need to handle possible NaNs
 
-        # Calculate the histogram
+        # Calculate the histogram and handle NaNs
         if output_only:
+            # histogram of true increment and predicted increment
             xt = x[..., variable_idx].squeeze() * int(output_only)
-            hist_yt, bins_yt = np.histogram((yt - xt), bins=100)
-            hist_yp, bins_yp = np.histogram((yp - xt), bins=100)
+            yt_xt = yt - xt
+            yp_xt = yp - xt
+            # enforce the same binning for both histograms
+            bin_min = min(np.nanmin(yt_xt), np.nanmin(yp_xt))
+            bin_max = max(np.nanmax(yt_xt), np.nanmax(yp_xt))
+            hist_yt, bins_yt = np.histogram(yt_xt[~np.isnan(yt_xt)], bins=100, range=[bin_min, bin_max])
+            hist_yp, bins_yp = np.histogram(yp_xt[~np.isnan(yp_xt)], bins=100, range=[bin_min, bin_max])
         else:
-            hist_yt, bins_yt = np.histogram(yt, bins=100)
-            hist_yp, bins_yp = np.histogram(yp, bins=100)
+            # enforce the same binning for both histograms
+            bin_min = min(np.nanmin(yt), np.nanmin(yp))
+            bin_max = max(np.nanmax(yt), np.nanmax(yp))
+            hist_yt, bins_yt = np.histogram(yt[~np.isnan(yt)], bins=100, range=[bin_min, bin_max])
+            hist_yp, bins_yp = np.histogram(yp[~np.isnan(yp)], bins=100, range=[bin_min, bin_max])
 
         # Visualization trick for tp
         if variable_name in {"tp", "cp"}:
             # in-place multiplication does not work here because variables are different numpy types
             hist_yt = hist_yt * bins_yt[:-1]
             hist_yp = hist_yp * bins_yp[:-1]
         # Plot the modified histogram
-        ax[plot_idx].bar(bins_yt[:-1], hist_yt, width=np.diff(bins_yt), color="blue", alpha=0.7, label="Truth (ERA5)")
-        ax[plot_idx].bar(bins_yp[:-1], hist_yp, width=np.diff(bins_yp), color="red", alpha=0.7, label="Anemoi")
+        ax[plot_idx].bar(bins_yt[:-1], hist_yt, width=np.diff(bins_yt), color="blue", alpha=0.7, label="Truth (data)")
+        ax[plot_idx].bar(bins_yp[:-1], hist_yp, width=np.diff(bins_yp), color="red", alpha=0.7, label="Predicted")
 
         ax[plot_idx].set_title(variable_name)
         ax[plot_idx].set_xlabel(variable_name)