Refactor thermal conductivity metrics and calculation modules

- rename mode_kappa_tot to mode_kappa_tot_rta in MbdKey enums - reinstate more flexible tiered mode kappa computation in calc_kappa_srme, i.e. support calculating mode kappa from components (kappa_p_rta, kappa_c, heat_capacity) - add attribution for ported code in phonons/thermal_conductivity.py and metrics/phonons.py module doc strings - update test cases to reflect fixed metric calculation logic
janosh · Jan 29, 2025 · d2ef1e9 · d2ef1e9
1 parent 4285630
commit d2ef1e9
Show file tree

Hide file tree

Showing 9 changed files with 478 additions and 66 deletions.
diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py
@@ -311,7 +311,9 @@ class DataFiles(Files):
     phonondb_pbe_103_structures = (
         "phonons/2024-11-09-phononDB-PBE-103-structures.extxyz"
     )
-    phonondb_pbe_103_kappa_nac = "phonons/2024-11-09-kappas-phononDB-PBE-noNAC.json.gz"
+    phonondb_pbe_103_kappa_no_nac = (
+        "phonons/2024-11-09-kappas-phononDB-PBE-noNAC.json.gz"
+    )
 
     @functools.cached_property
     def yaml(self) -> dict[str, dict[str, str]]:

diff --git a/matbench_discovery/enums.py b/matbench_discovery/enums.py
@@ -82,7 +82,10 @@ class MbdKey(LabelEnum):
     kappa_tot_avg = "kappa_tot_avg", "Average total thermal conductivity"
     kappa_p_rta = "kappa_p_rta", "Particle-like thermal conductivity (RTA)"
     kappa_c = "kappa_c", "Thermal conductivity correction"
-    mode_kappa_tot = "mode_kappa_tot", "Mode-resolved total thermal conductivity"
+    mode_kappa_tot_rta = (
+        "mode_kappa_tot_rta",
+        "Mode-resolved total thermal conductivity (RTA)",
+    )
     mode_kappa_tot_avg = (
         "mode_kappa_tot_avg",
         "Mode-resolved average thermal conductivity",

diff --git a/matbench_discovery/metrics/phonons.py b/matbench_discovery/metrics/phonons.py
@@ -8,6 +8,13 @@
   metric that is particularly useful since it's not subject to error cancellation.
   Overpredictions of kappa-contributions from one mode will not cancel against
   underpredictions from another mode.
+
+Code in this module is adapted from https://github.com/MPA2suite/k_SRME/blob/6ff4c867/k_srme/benchmark.py.
+All credit to Balázs Póta, Paramvir Ahlawat, Gábor Csányi, Michele Simoncelli. See
+https://arxiv.org/abs/2408.00755 for details.
+It was ported to this repo in https://github.com/janosh/matbench-discovery/pull/196 to
+implement parallelization across input structures which allows scaling thermal
+conductivity metric to larger test sets.
 """
 
 import traceback
@@ -18,6 +25,7 @@
 from pymatviz.enums import Key
 
 from matbench_discovery.enums import MbdKey
+from matbench_discovery.phonons import thermal_conductivity as ltc
 
 
 def calc_kappa_metrics_from_dfs(
@@ -51,9 +59,6 @@ def calc_kappa_metrics_from_dfs(
     df_pred[MbdKey.kappa_tot_avg] = df_pred[MbdKey.kappa_tot_rta].map(
         calculate_kappa_avg
     )
-    df_pred[MbdKey.mode_kappa_tot_avg] = df_pred[MbdKey.mode_kappa_tot].map(
-        calculate_kappa_avg
-    )
 
     df_pred[Key.srd] = (
         2
@@ -64,7 +69,7 @@ def calc_kappa_metrics_from_dfs(
     # turn temperature list to the first temperature (300K) TODO: allow multiple
     # temperatures to be tested
     df_pred[Key.srd] = df_pred[Key.srd].map(
-        lambda x: x[0] if not isinstance(x, float) else x
+        lambda x: x if isinstance(x, float) else x[0]
     )
 
     # We substitute NaN values with 0 predicted conductivity, yielding -2 for SRD
@@ -76,38 +81,36 @@ def calc_kappa_metrics_from_dfs(
 
     df_pred[MbdKey.true_kappa_tot_avg] = df_true[MbdKey.kappa_tot_avg]
 
-    cols_to_remove = [MbdKey.mode_kappa_tot]
-    return df_pred.drop(columns=cols_to_remove, errors="ignore")
+    return df_pred
 
 
 def calculate_kappa_avg(kappa: np.ndarray) -> np.ndarray:
-    """Calculate the average thermal conductivity from the conductivity tensor.
+    """Calculate directionally averaged trace of the conductivity tensor obtained from
+    the Wigner transport equation (WTE) solution in the relaxation-time approximation.
 
-    Takes a thermal conductivity tensor and computes its trace (average of diagonal
-    components). This represents the average thermal conductivity in all directions,
-    which is a useful scalar metric for comparing materials.
+    Takes a thermal conductivity tensor and returns its trace (average of diagonal
+    components). This represents the average thermal conductivity in the 3 spatial
+    directions, which is a useful scalar metric for comparing materials.
 
     Args:
         kappa: Thermal conductivity tensor, typically of shape (..., 3, 3) where
             the last two dimensions represent the 3x3 conductivity tensor.
             Earlier dimensions may include temperatures or other parameters.
 
     Returns:
-        Average conductivity value(s). Returns np.nan if the input contains
+        np.ndarray: Average conductivity value(s). Returns np.nan if the input contains
         any NaN values or if the calculation fails. For multiple temperatures,
         returns an array of averages.
     """
-    if np.any(pd.isna(kappa)):
-        return np.nan
-    kappa = np.asarray(kappa)
-
+    if np.any(np.isnan(kappa)):
+        return np.array([np.nan])
     try:
-        return kappa[..., :3].mean(axis=-1)
+        return np.asarray(kappa)[..., :3].mean(axis=-1)
     except Exception:
         warnings.warn(
             f"Failed to calculate kappa_avg: {traceback.format_exc()}", stacklevel=2
         )
-        return np.nan
+        return np.array([np.nan])
 
 
 def calc_kappa_srme_dataframes(
@@ -186,23 +189,45 @@ def calc_kappa_srme(kappas_pred: pd.Series, kappas_true: pd.Series) -> np.ndarra
         - 2 indicates complete disagreement or invalid results
         - Returns [2] for various error conditions (missing data, NaN values)
     """
-    if np.all(pd.isna(kappas_pred[MbdKey.kappa_tot_avg])):
+    if np.any(np.isnan(kappas_true[MbdKey.kappa_tot_avg])):
+        raise ValueError("found NaNs in kappa_tot_avg reference values")
+    if (  # return highest possible SRME=2 if any of these conditions are met:
+        # only have NaN averaged kappa preds
+        np.all(np.isnan(kappas_pred[MbdKey.kappa_tot_avg]))
+        # some mode-resolved kappa preds are NaN
+        or np.any(np.isnan(kappas_pred[MbdKey.kappa_tot_rta]))
+        # some mode weights are NaN
+        or np.any(np.isnan(kappas_pred[Key.mode_weights]))
+    ):
         return [2]
-    if np.any(pd.isna(kappas_pred[MbdKey.kappa_tot_rta])):
-        return [2]  # np.nan
-    if np.any(pd.isna(kappas_pred[Key.mode_weights])):
-        return [2]  # np.nan
-    if np.any(pd.isna(kappas_true[MbdKey.kappa_tot_avg])):
-        return [2]  # np.nan
-
-    mode_kappa_tot_avg_pred = calculate_kappa_avg(kappas_pred[MbdKey.kappa_tot_rta])
-    mode_kappa_tot_avg_true = calculate_kappa_avg(kappas_true[MbdKey.kappa_tot_avg])
+
+    mode_kappa_tot_avgs = {}  # store results for pred and true
+    # Try different data sources in order of preference for both pred and true data
+    for label, kappas in {"preds": kappas_pred, "true": kappas_true}.items():
+        keys = set(kappas.keys())
+        if MbdKey.mode_kappa_tot_avg in kappas:
+            kappas = kappas[MbdKey.mode_kappa_tot_avg]
+        elif MbdKey.mode_kappa_tot_rta in kappas:
+            kappas = calculate_kappa_avg(kappas[MbdKey.mode_kappa_tot_rta])
+        elif {MbdKey.kappa_p_rta, MbdKey.kappa_c, Key.heat_capacity} <= keys:
+            kappas = calculate_kappa_avg(
+                ltc.calc_mode_kappa_tot(
+                    kappas[MbdKey.kappa_p_rta],
+                    kappas[MbdKey.kappa_c],
+                    kappas[Key.heat_capacity],
+                )
+            )
+        else:
+            raise ValueError(
+                f"Neither mode_kappa_tot_avg, mode_kappa_tot nor individual kappa\n"
+                f"components found in {label}, got\n{keys}"
+            )
+        mode_kappa_tot_avgs[label] = kappas
+
     # calculating microscopic error for all temperatures
     microscopic_error = (
-        np.abs(
-            mode_kappa_tot_avg_pred - mode_kappa_tot_avg_true  # reduce ndim by 1
-        ).sum(  # summing axes
-            axis=tuple(range(1, np.asarray(mode_kappa_tot_avg_pred).ndim))
+        np.abs(mode_kappa_tot_avgs["preds"] - mode_kappa_tot_avgs["true"]).sum(
+            axis=tuple(range(1, np.asarray(mode_kappa_tot_avgs["preds"]).ndim))
         )
         / np.asarray(kappas_pred[Key.mode_weights]).sum()
     )

diff --git a/matbench_discovery/phonons/thermal_conductivity.py b/matbench_discovery/phonons/thermal_conductivity.py
@@ -3,6 +3,9 @@
 Code is adapted from https://github.com/MPA2suite/k_SRME/blob/6ff4c867/k_srme/conductivity.py.
 All credit to Balázs Póta, Paramvir Ahlawat, Gábor Csányi, Michele Simoncelli. See
 https://arxiv.org/abs/2408.00755 for details.
+It was ported to this repo in https://github.com/janosh/matbench-discovery/pull/196 to
+implement parallelization across input structures which allows scaling thermal
+conductivity metric to larger test sets.
 """
 
 import warnings
@@ -246,7 +249,7 @@ def calculate_conductivity(
         Key.q_points: deepcopy(kappa.qpoints),
         Key.ph_freqs: deepcopy(kappa.frequencies),
     }
-    mode_kappa_total = kappa_dict[MbdKey.mode_kappa_tot] = calculate_mode_kappa_tot(
+    mode_kappa_total = kappa_dict[MbdKey.mode_kappa_tot_rta] = calc_mode_kappa_tot(
         deepcopy(kappa.mode_kappa_P_RTA[0]),
         deepcopy(kappa.mode_kappa_C[0]),
         deepcopy(kappa.mode_heat_capacities),
@@ -267,7 +270,7 @@ def calculate_conductivity(
     return ph3, kappa_dict, kappa
 
 
-def calculate_mode_kappa_tot(
+def calc_mode_kappa_tot(
     mode_kappa_p_rta: np.ndarray,
     mode_kappa_coherence: np.ndarray,
     heat_capacity: np.ndarray,