mir-group · kavanase · Apr 12, 2024 · Apr 12, 2024 · Apr 12, 2024 · Apr 19, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ Most recent change on the bottom.
 - alternate neighborlist support enabled with `NEQUIP_NL` environment variable, which can be set to `ase` (default), `matscipy` or `vesin`
 - Allow `n_train` and `n_val` to be specified as percentages of datasets.
 - Only attempt training restart if `trainer.pth` file present (prevents unnecessary crashes due to file-not-found errors in some cases)
+- Stratified metrics now possible; stratified by reference values in percent or raw units, or by error population.
 
 ### Changed
 - [Breaking] `NEQUIP_MATSCIPY_NL` environment variable no longer supported

diff --git a/CITATION.cff b/CITATION.cff
@@ -2,29 +2,50 @@ cff-version: "1.2.0"
 message: "If you use this software, please cite our article."
 authors:
 - family-names: Batzner
-    given-names: Simon
+  given-names: Simon
 - family-names: Musaelian
-    given-names: Albert
+  given-names: Albert
 - family-names: Sun
-    given-names: Lixin
+  given-names: Lixin
 - family-names: Geiger
-    given-names: Mario
+  given-names: Mario
 - family-names: Mailoa
-    given-names: Jonathan P.
+  given-names: Jonathan P.
 - family-names: Kornbluth
-    given-names: Mordechai
+  given-names: Mordechai
 - family-names: Molinari
-    given-names: Nicola
+  given-names: Nicola
 - family-names: Smidt
-    given-names: Tess E.
+  given-names: Tess E.
 - family-names: Kozinsky
-    given-names: Boris
+  given-names: Boris
 doi: 10.1038/s41467-022-29939-5
-date-published: 2022-05-04
-issn: 2041-1723
-journal: Nature Communications
-start: 2453
-title: "E(3)-equivariant graph neural networks for data-efficient and accurate interatomic potentials"
-type: article
-url: "https://www.nature.com/articles/s41467-022-29939-5"
-volume: 13
+preferred-citation:
+    authors:
+    - family-names: Batzner
+      given-names: Simon
+    - family-names: Musaelian
+      given-names: Albert
+    - family-names: Sun
+      given-names: Lixin
+    - family-names: Geiger
+      given-names: Mario
+    - family-names: Mailoa
+      given-names: Jonathan P.
+    - family-names: Kornbluth
+      given-names: Mordechai
+    - family-names: Molinari
+      given-names: Nicola
+    - family-names: Smidt
+      given-names: Tess E.
+    - family-names: Kozinsky
+      given-names: Boris
+    doi: 10.1038/s41467-022-29939-5
+    date-published: 2022-05-04
+    issn: 2041-1723
+    journal: Nature Communications
+    start: 2453
+    title: "E(3)-equivariant graph neural networks for data-efficient and accurate interatomic potentials"
+    type: article
+    url: "https://www.nature.com/articles/s41467-022-29939-5"
+    volume: 13
diff --git a/configs/full.yaml b/configs/full.yaml
@@ -281,12 +281,23 @@ metrics_components:
   - - forces                                
     - rmse                                  
     - PerSpecies: True                     
-      report_per_component: False    
+      report_per_component: False
   - - total_energy
     - mae    
   - - total_energy
     - mae
     - PerAtom: True                        # if true, energy is normalized by the number of atoms
+# we can also output errors stratified by the reference value ranges (in percent or absolute values), or by the error populations in percent:
+  - - total_energy
+    - mae
+    - stratify: 10%_range                 # stratify by range (in reference energies per atom), in increments of 10% (i.e. errors for first 10% lowest reference values, next 10% etc)
+      PerAtom: True
+  - - forces
+    - rmse
+    - stratify: 10%_population          # stratify by population (in forces errors per atom), in increments of 10%  (i.e. errors for first 10% lowest errors, next 10% etc)
+  - - stress
+    - mae
+    - stratify: 0.001                  # stratify by absolute value (in reference stresses), in increments of 0.001
 
 # optimizer, may be any optimizer defined in torch.optim
 # the name `optimizer_name`is case sensitive

diff --git a/nequip/__init__.py b/nequip/__init__.py
@@ -1,3 +1,4 @@
+import os
 import sys
 
 from ._version import __version__  # noqa: F401
@@ -16,7 +17,10 @@
 ), f"NequIP supports PyTorch 1.11.* or 1.13.* or later, but {torch_version} found"
 
 # warn if using 1.13* or 2.0.*
-if packaging.version.parse("1.13.0") <= torch_version:
+if (
+    packaging.version.parse("1.13.0") <= torch_version
+    and int(os.environ.get("PYTORCH_VERSION_WARNING", 1)) != 0
+):
     warnings.warn(
         f"!! PyTorch version {torch_version} found. Upstream issues in PyTorch versions 1.13.* and 2.* have been seen to cause unusual performance degredations on some CUDA systems that become worse over time; see https://github.com/mir-group/nequip/discussions/311. The best tested PyTorch version to use with CUDA devices is 1.11; while using other versions if you observe this problem, an unexpected lack of this problem, or other strange behavior, please post in the linked GitHub issue."
     )

diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py
@@ -455,11 +455,13 @@ def main(args=None, running_as_script: bool = True):
 
     if do_metrics:
         logger.info("\n--- Final result: ---")
-        logger.critical(
+        logger.info(
             "\n".join(
-                f"{k:>20s} = {v:< 20f}"
+                f"{k:>30s} = {v:< 30f}"
                 for k, v in metrics.flatten_metrics(
-                    metrics.current_result(),
+                    metrics.current_result(
+                        verbose=True
+                    ),  # verbose output about strata on final call
                     type_names=dataset.type_mapper.type_names,
                 )[0].items()
             )