Skip to content

Commit

Permalink
Refactor def and normalizer
Browse files Browse the repository at this point in the history
  • Loading branch information
ladinesa committed Jan 29, 2025
1 parent 9232147 commit 0b39f46
Show file tree
Hide file tree
Showing 4 changed files with 252 additions and 131 deletions.
179 changes: 112 additions & 67 deletions src/nomad_simulations/schema_packages/workflow/general.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,148 @@
from nomad.datamodel import ArchiveSection, EntryArchive
from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
from nomad.metainfo import SubSection
from nomad.metainfo import Quantity, SchemaPackage, SubSection
from structlog.stdlib import BoundLogger

from nomad_simulations.schema_packages.model_method import ModelMethod
from nomad_simulations.schema_packages.model_system import ModelSystem

INCORRECT_N_TASKS = 'Incorrect number of tasks found.'

m_package = SchemaPackage()


class SimulationWorkflowMethod(ArchiveSection):
class SimulationWorkflowModel(ArchiveSection):
"""
Base class for simulation workflow method sub-section definition.
Base class for simulation workflow model sub-section definition.
"""

pass
initial_system = Quantity(
type=ModelSystem,
description="""
Reference to the input model_system.
""",
)

initial_method = Quantity(
type=ModelMethod,
description="""
Reference to the input model_method.
""",
)


class SimulationWorkflowResults(ArchiveSection):
"""
Base class for simulation workflow results sub-section definition.
"""

pass
final_system = Quantity(
type=ModelSystem,
description="""
Reference to the final model_system.
""",
)


class SimulationWorkflow(Workflow):
"""
Base class for simulation workflows.
"""

method = SubSection(sub_section=SimulationWorkflowMethod.m_def)
model_label = 'Input model'

results_label = 'Output results'

model = SubSection(sub_section=SimulationWorkflowModel.m_def)

results = SubSection(sub_section=SimulationWorkflowResults.m_def)

def normalize(self, archive: EntryArchive, logger: BoundLogger):
def generate_inputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
if not self.model:
self.model = SimulationWorkflowModel()
self.model.initial_method = archive.data.outputs[0].model_method_ref
self.model.initial_system = archive.data.outputs[0].model_system_ref

# set method as inputs
self.inputs.append(Link(name=self.model_label, section=self.model))

def generate_outputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
if not self.results:
self.results = SimulationWorkflowResults()
self.results.final_system = archive.data.outputs[-1].model_system_ref

# set results as outputs
self.outputs.append(Link(name=self.results_label, section=self.results))

def generate_tasks(self, archive: EntryArchive, logger: BoundLogger) -> None:
"""
Generate tasks from the archive data outputs.
Generate tasks from archive data outputs. Tasks are ordered and linked based
on the execution time of the calculation corresponding to the output.
By default, the tasks follow the order of the outputs and are linked sequentially.
"""
# default should to serial execution
times: list[tuple[float, float]] = list(
[
(o.wall_start or n, o.wall_end or n)
for n, o in enumerate(archive.data.outputs)
]
)
times.sort(key=lambda x: x[0])
# current parent task
parent_n = 0
parent_outputs: list[Link] = []
for n, time in enumerate(times):
task = Task(
outputs=[
Link(
name='Output',
section=archive.data.outputs[n],
)
],
)
self.tasks.append(task)
# link tasks based on overlap in execution time
if time[0] >= times[parent_n][1]:
# if no overlap, assign outputs of parent as input to next task
task.inputs.extend(
[
Link(name='Input', section=output.section)
for output in parent_outputs or task.outputs
]
)
# assign first parent outputs as workflow inputs
if not self.inputs:
self.inputs.extend(task.inputs)
# assign as new parent
parent_n = n
# reset outputs
parent_outputs = list(task.outputs)
else:
parent_outputs.extend(task.outputs)
# if overlap, assign parent outputs to task inputs
task.inputs.extend(
[
Link(name='Input', section=output.section)
for output in self.tasks[parent_n or n].outputs
]
)

if not self.outputs:
# assign parent outputs as workflow outputs
self.outputs.extend(parent_outputs)

def normalize(self, archive: EntryArchive, logger: BoundLogger):
if not archive.data or not archive.data.outputs:
return

# generate tasks from outputs
if not self.tasks:
# default should to serial execution
times: list[tuple[float, float]] = list(
[
(o.wall_start or n, o.wall_end or n)
for n, o in enumerate(archive.data.outputs)
]
)
times.sort(key=lambda x: x[0])
# current parent task
parent_n = 0
parent_outputs: list[Link] = []
for n, time in enumerate(times):
task = Task(
outputs=[
Link(
name='Output',
section=archive.data.outputs[n],
)
],
)
self.tasks.append(task)
# link tasks based on overlap in execution time
if time[0] >= times[parent_n][1]:
# if no overlap, assign outputs of parent as input to next task
task.inputs.extend(
[
Link(name='Input', section=output.section)
for output in parent_outputs or task.outputs
]
)
# assign first parent outputs as workflow inputs
if not self.inputs:
self.inputs.extend(task.inputs)
# assign as new parent
parent_n = n
# reset outputs
parent_outputs = list(task.outputs)
else:
parent_outputs.extend(task.outputs)
# if overlap, assign parent outputs to task inputs
task.inputs.extend(
[
Link(name='Input', section=output.section)
for output in self.tasks[parent_n or n].outputs
]
)
if not self.outputs:
# assign parent outputs as workflow outputs
self.outputs.extend(parent_outputs)
if not self.inputs:
self.generate_inputs(archive, logger)

if not self.method:
self.method = SimulationWorkflowMethod()
if not self.outputs:
self.generate_outputs(archive, logger)

if not self.results:
self.results = SimulationWorkflowResults()
if not self.tasks:
self.generate_tasks(archive, logger)

# set method as inputs
self.inputs.append(Link(name='Input method', section=self.method))

# set results as outputs
self.outputs.append(Link(name='Ouput results', section=self.results))
m_package.__init_metainfo__()
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import numpy as np
from nomad.datamodel import EntryArchive
from nomad.datamodel.metainfo.workflow import Link, Task
from nomad.metainfo import MEnum, Quantity
from nomad.metainfo.util import MSubSectionList
from nomad.metainfo import MEnum, Quantity, SchemaPackage
from structlog.stdlib import BoundLogger

from nomad_simulations.schema_packages.outputs import Outputs

from .general import (
SimulationWorkflow,
SimulationWorkflowMethod,
SimulationWorkflowModel,
SimulationWorkflowResults,
)

m_package = SchemaPackage()


class GeometryOptimizationMethod(SimulationWorkflowMethod):
class GeometryOptimizationModel(SimulationWorkflowModel):
optimization_type = Quantity(
type=MEnum('static', 'atomic', 'cell_shape', 'cell_volume'),
shape=[],
Expand Down Expand Up @@ -94,51 +93,102 @@ class GeometryOptimizationMethod(SimulationWorkflowMethod):
type=int,
shape=[],
description="""
The number of optimization steps between sucessive outputs.
The number of optimization steps between saved outputs.
""",
)


class GeometryOptimizationResults(SimulationWorkflowResults):
pass
n_steps = Quantity(
type=int,
shape=[],
description="""
Number of saved optimization steps.
""",
)

energies = Quantity(
type=np.float64,
unit='joule',
shape=['optimization_steps'],
description="""
List of energy_total values gathered from the single configuration
calculations that are a part of the optimization trajectory.
""",
)

steps = Quantity(
type=np.int32,
shape=['optimization_steps'],
description="""
The step index corresponding to each saved configuration.
""",
)

final_energy_difference = Quantity(
type=np.float64,
shape=[],
unit='joule',
description="""
The difference in the energy_total between the last two steps during
optimization.
""",
)

final_force_maximum = Quantity(
type=np.float64,
shape=[],
unit='newton',
description="""
The maximum net force in the last optimization step.
""",
)

final_displacement_maximum = Quantity(
type=np.float64,
shape=[],
unit='meter',
description="""
The maximum displacement in the last optimization step with respect to previous.
""",
)

is_converged_geometry = Quantity(
type=bool,
shape=[],
description="""
Indicates if the geometry convergence criteria were fulfilled.
""",
)


class GeometryOptimization(SimulationWorkflow):
"""
Definitions for geometry optimization workflow.
"""

def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
"""
Specify the inputs and outputs of the tasks as the model system.
"""
task_label = 'Step'

# set up first method and results before we call base normalizer
if not self.method:
self.method = GeometryOptimizationMethod()
def generate_inputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
if not self.model:
self.model = GeometryOptimizationModel()
super().generate_inputs(archive, logger)

def generate_outputs(self, archive: EntryArchive, logger: BoundLogger):
if not self.results:
self.results = GeometryOptimizationResults()
super().generate_outputs(archive, logger)

super().normalize(archive, logger)

def extend_links(task: Task) -> None:
def get_system_links(links: MSubSectionList, name: str) -> list[Link]:
return [
Link(name=name, section=link.section.model_system_ref)
for link in links
if isinstance(link.section, Outputs)
and link.section.model_system_ref
]
def generate_tasks(self, archive: EntryArchive, logger: BoundLogger) -> None:
super().generate_tasks(archive, logger)
for n, task in enumerate(self.tasks):
if not task.name:
task.name = f'{self.task_label} {n}'

task.inputs.extend(get_system_links(self.inputs, 'Input system'))
task.outputs.extend(get_system_links(self.outputs, 'Output system'))
# link inputs to first task
self.tasks[0].inputs.extend(self.inputs)
# add outputs of last task to outputs
self.outputs.extend(self.tasks[-1].outputs)

if not self.name:
self.name = 'Geometry Optimization'

extend_links(self)
for n, task in enumerate(self.tasks):
if not task.name:
task.name = f'Step {n}'
extend_links(task)
m_package.__init_metainfo__()
6 changes: 6 additions & 0 deletions src/nomad_simulations/schema_packages/workflow/gw.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from nomad.datamodel import EntryArchive
from nomad.metainfo import SchemaPackage
from structlog.stdlib import BoundLogger

from .general import INCORRECT_N_TASKS, SimulationWorkflow

m_package = SchemaPackage()


class DFTGWWorkflow(SimulationWorkflow):
"""
Expand Down Expand Up @@ -35,3 +38,6 @@ def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
self.tasks[0].outputs = self.tasks[0].task.outputs
self.tasks[1].inputs = self.tasks[0].outputs
self.tasks[1].outputs = self.outputs


m_package.__init_metainfo__()
Loading

1 comment on commit 0b39f46

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
src/nomad_simulations
   __init__.py4250%3–4
   _version.py11282%5–6
src/nomad_simulations/schema_packages
   __init__.py15287%39–41
   atoms_state.py1902189%13–15, 201–204, 228, 283–284, 352–353, 355, 537, 549–550, 611–615, 630–634, 641
   basis_set.py2402888%8–9, 122–133, 172–185, 208, 391–395, 417–418, 462–465, 584, 615, 617
   general.py85891%4–7, 123, 143, 253–254, 264
   model_method.py2697871%10–12, 171–174, 177–184, 276–277, 297, 318–339, 355–381, 384–401, 587, 780, 791, 833–840, 878, 897, 977, 1034, 1109, 1223
   model_system.py3483789%45–51, 235, 254, 258, 261, 264, 290, 376–377, 454–455, 472–473, 686–689, 736–743, 917–918, 1140–1144, 1150–1151, 1159–1160, 1165, 1188
   numerical_settings.py2596176%12–14, 217, 219–220, 223–226, 230–231, 238–241, 250–253, 257–260, 262–265, 270–273, 279–282, 469–496, 571, 606–609, 633, 636, 681, 683–686, 690, 694, 741, 745–766, 821–822, 889
   outputs.py1201092%8–9, 253–256, 296–299, 324, 326, 363, 382
   physical_property.py102793%20–22, 202, 331–333
   variables.py861286%8–10, 98, 121, 145, 167, 189, 211, 233, 256, 276
src/nomad_simulations/schema_packages/properties
   band_gap.py51590%8–10, 135–136
   band_structure.py1232580%9–11, 232–265, 278, 285, 321–322, 325, 372–373, 378
   energies.py42979%7–9, 36, 57, 82, 103, 119, 134
   fermi_surface.py17476%7–9, 40
   forces.py22673%7–9, 36, 56, 79
   greens_function.py991387%7–9, 210–211, 214, 235–236, 239, 260–261, 264, 400
   hopping_matrix.py29583%7–9, 58, 94
   permittivity.py48883%7–9, 97–105
   spectral_profile.py26012851%9–11, 57–60, 95–98, 199–300, 356–368, 393–396, 416, 421–424, 466–502, 526, 573–576, 592–593, 598–604
   thermodynamics.py752764%7–9, 35, 56, 72, 81, 90, 101, 110, 137, 147, 157, 172–174, 177, 193, 213–215, 218, 234, 254–256, 259
src/nomad_simulations/schema_packages/utils
   utils.py791680%8–11, 65–74, 83–84, 89, 92, 169–170
src/nomad_simulations/schema_packages/workflow
   __init__.py440%1–4
   general.py57570%1–148
   geometry_optimization.py41410%1–194
   gw.py22220%1–43
   single_point.py28280%1–60
TOTAL274766676% 

Tests Skipped Failures Errors Time
402 0 💤 0 ❌ 0 🔥 6.639s ⏱️

Please sign in to comment.