Refactor def and normalizer

nomad-coe · Jan 29, 2025 · 0b39f46 · 0b39f46 · github-actions · Jan 29, 2025
1 parent 9232147
commit 0b39f46
Show file tree

Hide file tree

Showing 4 changed files with 252 additions and 131 deletions.
diff --git a/src/nomad_simulations/schema_packages/workflow/general.py b/src/nomad_simulations/schema_packages/workflow/general.py
@@ -1,103 +1,148 @@
 from nomad.datamodel import ArchiveSection, EntryArchive
 from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
-from nomad.metainfo import SubSection
+from nomad.metainfo import Quantity, SchemaPackage, SubSection
 from structlog.stdlib import BoundLogger
 
+from nomad_simulations.schema_packages.model_method import ModelMethod
+from nomad_simulations.schema_packages.model_system import ModelSystem
+
 INCORRECT_N_TASKS = 'Incorrect number of tasks found.'
 
+m_package = SchemaPackage()
+
 
-class SimulationWorkflowMethod(ArchiveSection):
+class SimulationWorkflowModel(ArchiveSection):
     """
-    Base class for simulation workflow method sub-section definition.
+    Base class for simulation workflow model sub-section definition.
     """
 
-    pass
+    initial_system = Quantity(
+        type=ModelSystem,
+        description="""
+        Reference to the input model_system.
+        """,
+    )
+
+    initial_method = Quantity(
+        type=ModelMethod,
+        description="""
+        Reference to the input model_method.
+        """,
+    )
 
 
 class SimulationWorkflowResults(ArchiveSection):
     """
     Base class for simulation workflow results sub-section definition.
     """
 
-    pass
+    final_system = Quantity(
+        type=ModelSystem,
+        description="""
+        Reference to the final model_system.
+        """,
+    )
 
 
 class SimulationWorkflow(Workflow):
     """
     Base class for simulation workflows.
     """
 
-    method = SubSection(sub_section=SimulationWorkflowMethod.m_def)
+    model_label = 'Input model'
+
+    results_label = 'Output results'
+
+    model = SubSection(sub_section=SimulationWorkflowModel.m_def)
 
     results = SubSection(sub_section=SimulationWorkflowResults.m_def)
 
-    def normalize(self, archive: EntryArchive, logger: BoundLogger):
+    def generate_inputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        if not self.model:
+            self.model = SimulationWorkflowModel()
+        self.model.initial_method = archive.data.outputs[0].model_method_ref
+        self.model.initial_system = archive.data.outputs[0].model_system_ref
+
+        # set method as inputs
+        self.inputs.append(Link(name=self.model_label, section=self.model))
+
+    def generate_outputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        if not self.results:
+            self.results = SimulationWorkflowResults()
+        self.results.final_system = archive.data.outputs[-1].model_system_ref
+
+        # set results as outputs
+        self.outputs.append(Link(name=self.results_label, section=self.results))
+
+    def generate_tasks(self, archive: EntryArchive, logger: BoundLogger) -> None:
         """
-        Generate tasks from the archive data outputs.
+        Generate tasks from archive data outputs. Tasks are ordered and linked based
+        on the execution time of the calculation corresponding to the output.
+        By default, the tasks follow the order of the outputs and are linked sequentially.
         """
+        # default should to serial execution
+        times: list[tuple[float, float]] = list(
+            [
+                (o.wall_start or n, o.wall_end or n)
+                for n, o in enumerate(archive.data.outputs)
+            ]
+        )
+        times.sort(key=lambda x: x[0])
+        # current parent task
+        parent_n = 0
+        parent_outputs: list[Link] = []
+        for n, time in enumerate(times):
+            task = Task(
+                outputs=[
+                    Link(
+                        name='Output',
+                        section=archive.data.outputs[n],
+                    )
+                ],
+            )
+            self.tasks.append(task)
+            # link tasks based on overlap in execution time
+            if time[0] >= times[parent_n][1]:
+                # if no overlap, assign outputs of parent as input to next task
+                task.inputs.extend(
+                    [
+                        Link(name='Input', section=output.section)
+                        for output in parent_outputs or task.outputs
+                    ]
+                )
+                # assign first parent outputs as workflow inputs
+                if not self.inputs:
+                    self.inputs.extend(task.inputs)
+                # assign as new parent
+                parent_n = n
+                # reset outputs
+                parent_outputs = list(task.outputs)
+            else:
+                parent_outputs.extend(task.outputs)
+                # if overlap, assign parent outputs to task inputs
+                task.inputs.extend(
+                    [
+                        Link(name='Input', section=output.section)
+                        for output in self.tasks[parent_n or n].outputs
+                    ]
+                )
+
+        if not self.outputs:
+            # assign parent outputs as workflow outputs
+            self.outputs.extend(parent_outputs)
+
+    def normalize(self, archive: EntryArchive, logger: BoundLogger):
         if not archive.data or not archive.data.outputs:
             return
 
-        # generate tasks from outputs
-        if not self.tasks:
-            # default should to serial execution
-            times: list[tuple[float, float]] = list(
-                [
-                    (o.wall_start or n, o.wall_end or n)
-                    for n, o in enumerate(archive.data.outputs)
-                ]
-            )
-            times.sort(key=lambda x: x[0])
-            # current parent task
-            parent_n = 0
-            parent_outputs: list[Link] = []
-            for n, time in enumerate(times):
-                task = Task(
-                    outputs=[
-                        Link(
-                            name='Output',
-                            section=archive.data.outputs[n],
-                        )
-                    ],
-                )
-                self.tasks.append(task)
-                # link tasks based on overlap in execution time
-                if time[0] >= times[parent_n][1]:
-                    # if no overlap, assign outputs of parent as input to next task
-                    task.inputs.extend(
-                        [
-                            Link(name='Input', section=output.section)
-                            for output in parent_outputs or task.outputs
-                        ]
-                    )
-                    # assign first parent outputs as workflow inputs
-                    if not self.inputs:
-                        self.inputs.extend(task.inputs)
-                    # assign as new parent
-                    parent_n = n
-                    # reset outputs
-                    parent_outputs = list(task.outputs)
-                else:
-                    parent_outputs.extend(task.outputs)
-                    # if overlap, assign parent outputs to task inputs
-                    task.inputs.extend(
-                        [
-                            Link(name='Input', section=output.section)
-                            for output in self.tasks[parent_n or n].outputs
-                        ]
-                    )
-            if not self.outputs:
-                # assign parent outputs as workflow outputs
-                self.outputs.extend(parent_outputs)
+        if not self.inputs:
+            self.generate_inputs(archive, logger)
 
-        if not self.method:
-            self.method = SimulationWorkflowMethod()
+        if not self.outputs:
+            self.generate_outputs(archive, logger)
 
-        if not self.results:
-            self.results = SimulationWorkflowResults()
+        if not self.tasks:
+            self.generate_tasks(archive, logger)
 
-        # set method as inputs
-        self.inputs.append(Link(name='Input method', section=self.method))
 
-        # set results as outputs
-        self.outputs.append(Link(name='Ouput results', section=self.results))
+m_package.__init_metainfo__()
diff --git a/src/nomad_simulations/schema_packages/workflow/geometry_optimization.py b/src/nomad_simulations/schema_packages/workflow/geometry_optimization.py
@@ -1,19 +1,18 @@
+import numpy as np
 from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.workflow import Link, Task
-from nomad.metainfo import MEnum, Quantity
-from nomad.metainfo.util import MSubSectionList
+from nomad.metainfo import MEnum, Quantity, SchemaPackage
 from structlog.stdlib import BoundLogger
 
-from nomad_simulations.schema_packages.outputs import Outputs
-
 from .general import (
     SimulationWorkflow,
-    SimulationWorkflowMethod,
+    SimulationWorkflowModel,
     SimulationWorkflowResults,
 )
 
+m_package = SchemaPackage()
+
 
-class GeometryOptimizationMethod(SimulationWorkflowMethod):
+class GeometryOptimizationModel(SimulationWorkflowModel):
     optimization_type = Quantity(
         type=MEnum('static', 'atomic', 'cell_shape', 'cell_volume'),
         shape=[],
@@ -94,51 +93,102 @@ class GeometryOptimizationMethod(SimulationWorkflowMethod):
         type=int,
         shape=[],
         description="""
-        The number of optimization steps between sucessive outputs.
+        The number of optimization steps between saved outputs.
         """,
     )
 
 
 class GeometryOptimizationResults(SimulationWorkflowResults):
-    pass
+    n_steps = Quantity(
+        type=int,
+        shape=[],
+        description="""
+        Number of saved optimization steps.
+        """,
+    )
+
+    energies = Quantity(
+        type=np.float64,
+        unit='joule',
+        shape=['optimization_steps'],
+        description="""
+        List of energy_total values gathered from the single configuration
+        calculations that are a part of the optimization trajectory.
+        """,
+    )
+
+    steps = Quantity(
+        type=np.int32,
+        shape=['optimization_steps'],
+        description="""
+        The step index corresponding to each saved configuration.
+        """,
+    )
+
+    final_energy_difference = Quantity(
+        type=np.float64,
+        shape=[],
+        unit='joule',
+        description="""
+        The difference in the energy_total between the last two steps during
+        optimization.
+        """,
+    )
+
+    final_force_maximum = Quantity(
+        type=np.float64,
+        shape=[],
+        unit='newton',
+        description="""
+        The maximum net force in the last optimization step.
+        """,
+    )
+
+    final_displacement_maximum = Quantity(
+        type=np.float64,
+        shape=[],
+        unit='meter',
+        description="""
+        The maximum displacement in the last optimization step with respect to previous.
+        """,
+    )
+
+    is_converged_geometry = Quantity(
+        type=bool,
+        shape=[],
+        description="""
+        Indicates if the geometry convergence criteria were fulfilled.
+        """,
+    )
 
 
 class GeometryOptimization(SimulationWorkflow):
     """
     Definitions for geometry optimization workflow.
     """
 
-    def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
-        """
-        Specify the inputs and outputs of the tasks as the model system.
-        """
+    task_label = 'Step'
 
-        # set up first method and results before we call base normalizer
-        if not self.method:
-            self.method = GeometryOptimizationMethod()
+    def generate_inputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        if not self.model:
+            self.model = GeometryOptimizationModel()
+        super().generate_inputs(archive, logger)
 
+    def generate_outputs(self, archive: EntryArchive, logger: BoundLogger):
         if not self.results:
             self.results = GeometryOptimizationResults()
+        super().generate_outputs(archive, logger)
 
-        super().normalize(archive, logger)
-
-        def extend_links(task: Task) -> None:
-            def get_system_links(links: MSubSectionList, name: str) -> list[Link]:
-                return [
-                    Link(name=name, section=link.section.model_system_ref)
-                    for link in links
-                    if isinstance(link.section, Outputs)
-                    and link.section.model_system_ref
-                ]
+    def generate_tasks(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        super().generate_tasks(archive, logger)
+        for n, task in enumerate(self.tasks):
+            if not task.name:
+                task.name = f'{self.task_label} {n}'
 
-            task.inputs.extend(get_system_links(self.inputs, 'Input system'))
-            task.outputs.extend(get_system_links(self.outputs, 'Output system'))
+        # link inputs to first task
+        self.tasks[0].inputs.extend(self.inputs)
+        # add outputs of last task to outputs
+        self.outputs.extend(self.tasks[-1].outputs)
 
-        if not self.name:
-            self.name = 'Geometry Optimization'
 
-        extend_links(self)
-        for n, task in enumerate(self.tasks):
-            if not task.name:
-                task.name = f'Step {n}'
-            extend_links(task)
+m_package.__init_metainfo__()
diff --git a/src/nomad_simulations/schema_packages/workflow/gw.py b/src/nomad_simulations/schema_packages/workflow/gw.py
@@ -1,8 +1,11 @@
 from nomad.datamodel import EntryArchive
+from nomad.metainfo import SchemaPackage
 from structlog.stdlib import BoundLogger
 
 from .general import INCORRECT_N_TASKS, SimulationWorkflow
 
+m_package = SchemaPackage()
+
 
 class DFTGWWorkflow(SimulationWorkflow):
     """
@@ -35,3 +38,6 @@ def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
         self.tasks[0].outputs = self.tasks[0].task.outputs
         self.tasks[1].inputs = self.tasks[0].outputs
         self.tasks[1].outputs = self.outputs
+
+
+m_package.__init_metainfo__()
File	Stmts	Miss	Cover	Missing
src/nomad_simulations
__init__.py	4	2	50%	3–4
_version.py	11	2	82%	5–6
src/nomad_simulations/schema_packages
__init__.py	15	2	87%	39–41
atoms_state.py	190	21	89%	13–15, 201–204, 228, 283–284, 352–353, 355, 537, 549–550, 611–615, 630–634, 641
basis_set.py	240	28	88%	8–9, 122–133, 172–185, 208, 391–395, 417–418, 462–465, 584, 615, 617
general.py	85	8	91%	4–7, 123, 143, 253–254, 264
model_method.py	269	78	71%	10–12, 171–174, 177–184, 276–277, 297, 318–339, 355–381, 384–401, 587, 780, 791, 833–840, 878, 897, 977, 1034, 1109, 1223
model_system.py	348	37	89%	45–51, 235, 254, 258, 261, 264, 290, 376–377, 454–455, 472–473, 686–689, 736–743, 917–918, 1140–1144, 1150–1151, 1159–1160, 1165, 1188
numerical_settings.py	259	61	76%	12–14, 217, 219–220, 223–226, 230–231, 238–241, 250–253, 257–260, 262–265, 270–273, 279–282, 469–496, 571, 606–609, 633, 636, 681, 683–686, 690, 694, 741, 745–766, 821–822, 889
outputs.py	120	10	92%	8–9, 253–256, 296–299, 324, 326, 363, 382
physical_property.py	102	7	93%	20–22, 202, 331–333
variables.py	86	12	86%	8–10, 98, 121, 145, 167, 189, 211, 233, 256, 276
src/nomad_simulations/schema_packages/properties
band_gap.py	51	5	90%	8–10, 135–136
band_structure.py	123	25	80%	9–11, 232–265, 278, 285, 321–322, 325, 372–373, 378
energies.py	42	9	79%	7–9, 36, 57, 82, 103, 119, 134
fermi_surface.py	17	4	76%	7–9, 40
forces.py	22	6	73%	7–9, 36, 56, 79
greens_function.py	99	13	87%	7–9, 210–211, 214, 235–236, 239, 260–261, 264, 400
hopping_matrix.py	29	5	83%	7–9, 58, 94
permittivity.py	48	8	83%	7–9, 97–105
spectral_profile.py	260	128	51%	9–11, 57–60, 95–98, 199–300, 356–368, 393–396, 416, 421–424, 466–502, 526, 573–576, 592–593, 598–604
thermodynamics.py	75	27	64%	7–9, 35, 56, 72, 81, 90, 101, 110, 137, 147, 157, 172–174, 177, 193, 213–215, 218, 234, 254–256, 259
src/nomad_simulations/schema_packages/utils
utils.py	79	16	80%	8–11, 65–74, 83–84, 89, 92, 169–170
src/nomad_simulations/schema_packages/workflow
__init__.py	4	4	0%	1–4
general.py	57	57	0%	1–148
geometry_optimization.py	41	41	0%	1–194
gw.py	22	22	0%	1–43
single_point.py	28	28	0%	1–60
TOTAL	2747	666	76%