diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..254a308 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,91 @@ +name: ci + +on: + push: + # only pushes to main trigger + branches: [main] + pull_request: + # always triggered + +jobs: + + tests: + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + python-version: ['3.12'] + aiida-version: ['stable'] + + services: + postgres: + image: postgres:10 + env: + POSTGRES_DB: test_aiida + POSTGRES_PASSWORD: '' + POSTGRES_HOST_AUTH_METHOD: trust + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + rabbitmq: + image: rabbitmq:latest + ports: + - 5672:5672 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install project manager + run: | + pip install hatch pytest pytest-cov + - name: Run test suite + env: + PYTEST_ADDOPTS: "--durations=0" + AIIDA_WARN_v3: 1 + run: | + hatch test --cover + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + + + docs: + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install project manager + run: | + pip install hatch + - name: Build docs + run: | + hatch run docs:build + + static-analysis: + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install project manager + run: | + pip install hatch + - name: Run formatter and linter + run: | + hatch fmt --check diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..d1b5b40 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,40 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: + python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4d839fd --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: +- repo: local + hooks: + - id: format + name: format + entry: hatch fmt -f + language: system + types: [python] + - id: lint + name: lint + entry: hatch fmt -l + language: system + types: [python] diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..c8acc97 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,24 @@ +version: 2 + +build: + os: "ubuntu-22.04" + tools: + python: "miniconda3-3.12-24.1" # note that libmamba-solver is available since 22.1 + nodejs: "20" # maybe need to be also miniconda + jobs: + post_create_environment: + - python -m pip install --no-cache-dir .[docs] + - python -m pip install --exists-action=w --no-cache-dir -r docs/requirements.txt + - rabbitmq-server -detached + - sleep 5 + - rabbitmq-diagnostics status + - verdi presto + - verdi daemon start + - verdi status + +conda: + environment: docs/environment.yml + +# Build from the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py diff --git a/README.md b/README.md index 2c58f26..d8596b9 100644 --- a/README.md +++ b/README.md @@ -1 +1,31 @@ -# aiida-pythonjob \ No newline at end of file +# AiiDA-PythonJob +[![PyPI version](https://badge.fury.io/py/aiida-pythonjob.svg)](https://badge.fury.io/py/aiida-pythonjob) +[![Unit test](https://github.com/aiidateam/aiida-pythonjob/actions/workflows/ci.yml/badge.svg)](https://github.com/aiidateam/aiida-pythonjob/actions/workflows/ci.yml) +[![codecov](https://codecov.io/gh/aiidateam/aiida-pythonjob/branch/main/graph/badge.svg)](https://codecov.io/gh/aiidateam/aiida-pythonjob) +[![Docs status](https://readthedocs.org/projects/aiida-pythonjob/badge)](http://aiida-pythonjob.readthedocs.io/) + +Efficiently design and manage flexible workflows with AiiDA, featuring an interactive GUI, checkpoints, provenance tracking, error-resistant, and remote execution capabilities. + + + +## Installation + +```console + pip install aiida-pythonjob +``` + +To install the latest version from source, first clone the repository and then install using `pip`: + +```console +git clone https://github.com/aiidateam/aiida-pythonjob +cd aiida-pythonjob +pip install -e . +``` + + +## Documentation +Explore the comprehensive [documentation](https://aiida-pythonjob.readthedocs.io/en/latest/) to discover all the features and capabilities of AiiDA Workgraph. + + +## License +[MIT](http://opensource.org/licenses/MIT) diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/environment.yml b/docs/environment.yml new file mode 100644 index 0000000..a01659b --- /dev/null +++ b/docs/environment.yml @@ -0,0 +1,7 @@ +name: base +channels: + - conda-forge + - defaults +dependencies: + - aiida-core + - aiida-core.services diff --git a/docs/gallery/autogen/GALLERY_HEADER.rst b/docs/gallery/autogen/GALLERY_HEADER.rst new file mode 100644 index 0000000..12385f4 --- /dev/null +++ b/docs/gallery/autogen/GALLERY_HEADER.rst @@ -0,0 +1,3 @@ +=========== +Quick Start +=========== diff --git a/docs/gallery/autogen/how_to.py b/docs/gallery/autogen/how_to.py new file mode 100644 index 0000000..4329dc5 --- /dev/null +++ b/docs/gallery/autogen/how_to.py @@ -0,0 +1,311 @@ +""" +How to guides +=============== + +""" + + +###################################################################### +# Introduction +# ------------ +# +# To run this tutorial, you need to load the AiiDA profile. +# + +from aiida import load_profile + +load_profile() + + +###################################################################### +# Default outputs +# -------------- +# +# The default output of the function is `result`. The `PythonJob` task +# will store the result as one node in the database with the key `result`. +# +from aiida.engine import run_get_node # noqa: E402 +from aiida_pythonjob import PythonJob, prepare_pythonjob_inputs # noqa: E402 + + +def add(x, y): + return x + y + + +inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + computer="localhost", +) +result, node = run_get_node(PythonJob, inputs=inputs) +print("result: ", result["result"]) + +###################################################################### +# Custom outputs +# -------------- +# If the function return a dictionary with fixed number of keys, and you +# want to store the values as separate outputs, you can specify the `function_outputs` parameter. +# For a dynamic number of outputs, you can use the namespace output, which is explained later. +# + + +def add(x, y): + return {"sum": x + y, "diff": x - y} + + +inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + function_outputs=[ + {"name": "sum"}, + {"name": "diff"}, + ], +) +result, node = run_get_node(PythonJob, **inputs) + +print("result: ") +print("sum: ", result["sum"]) +print("diff: ", result["diff"]) + + +###################################################################### +# Using parent folder +# -------------- +# The parent_folder parameter allows a task to access the output files of +# a parent task. This feature is particularly useful when you want to reuse +# data generated by a previous computation in subsequent computations. In +# the following example, the multiply task uses the `result.txt` file created by the add task. +# +# + + +def add(x, y): + z = x + y + with open("result.txt", "w") as f: + f.write(str(z)) + return x + y + + +def multiply(x, y): + with open("parent_folder/result.txt", "r") as f: + z = int(f.read()) + return x * y + z + + +inputs1 = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + function_outputs=[{"name": "sum"}], +) + +result1, node1 = run_get_node(PythonJob, inputs=inputs1) + +inputs2 = prepare_pythonjob_inputs( + multiply, + function_inputs={"x": 1, "y": 2}, + function_outputs=[{"name": "product"}], + parent_folder=result1["remote_folder"], +) + +result2, node2 = run_get_node(PythonJob, inputs=inputs2) + +print("result: ", result2) + +###################################################################### +# Upload files or folders to the remote computer +# -------------- +# The `upload_files` parameter allows users to upload files or folders to +# the remote computer. The files will be uploaded to the working directory of the remote computer. +# + +import os # noqa: E402 + +# create a temporary file "input.txt" in the current directory +with open("/tmp/input.txt", "w") as f: + f.write("2") + +# create a temporary folder "inputs_folder" in the current directory +# and add a file "another_input.txt" in the folder +os.makedirs("/tmp/inputs_folder", exist_ok=True) +with open("/tmp/inputs_folder/another_input.txt", "w") as f: + f.write("3") + + +def add(): + with open("input.txt", "r") as f: + a = int(f.read()) + with open("inputs_folder/another_input.txt", "r") as f: + b = int(f.read()) + return a + b + + +# ------------------------- Submit the calculation ------------------- +# we need use full path to the file +input_file = os.path.abspath("/tmp/input.txt") +input_folder = os.path.abspath("/tmp/inputs_folder") +inputs = prepare_pythonjob_inputs( + add, + upload_files={ + "input.txt": input_file, + "inputs_folder": input_folder, + }, +) +result, node = run_get_node(PythonJob, inputs=inputs) +print("result: ", result["result"]) + +###################################################################### +# Retrieve additional files from the remote computer +# -------------- +# Sometimes, one may want to retrieve additional files from the remote +# computer after the job has finished. For example, one may want to retrieve +# the output files generated by the `pw.x` calculation in Quantum ESPRESSO. +# +# One can use the `additional_retrieve_list` parameter to specify which files +# should be retrieved from the working directory and stored in the local +# repository after the job has finished +# + + +def add(x, y): + z = x + y + with open("result.txt", "w") as f: + f.write(str(z)) + return x + y + + +inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + metadata={ + "options": { + "additional_retrieve_list": ["result.txt"], + } + }, +) + +result, node = run_get_node(PythonJob, inputs=inputs) +print("retrieved files: ", result["retrieved"].list_object_names()) + +###################################################################### +# Namespace Output +# -------------- +# +# The `PythonJob` allows users to define namespace outputs. A namespace output +# is a dictionary with keys and values returned by a function. Each value in +# this dictionary will be serialized to AiiDA data, and the key-value pair +# will be stored in the database. +# Why Use Namespace Outputs? +# +# - **Dynamic and Flexible**: The keys and values in the namespace output are +# not fixed and can change based on the task's execution. +# - **Querying**: The data in the namespace output is stored as an AiiDA data +# node, allowing for easy querying and retrieval. +# - **Data Provenance**: When the data is used as input for subsequent tasks, +# the origin of data is tracked. +# +# For example: Consider a molecule adsorption calculation where the namespace +# output stores the surface slabs of the molecule adsorbed on different surface +# sites. The number of surface slabs can vary depending on the surface. These +# output surface slabs can be utilized as input to the next task to calculate the energy. + +from ase import Atoms # noqa: E402 +from ase.build import bulk # noqa: E402 + + +def generate_structures(structure: Atoms, factor_lst: list) -> dict: + """Scale the structure by the given factor_lst.""" + scaled_structures = {} + for i in range(len(factor_lst)): + atoms = structure.copy() + atoms.set_cell(atoms.cell * factor_lst[i], scale_atoms=True) + scaled_structures[f"s_{i}"] = atoms + return {"scaled_structures": scaled_structures} + + +inputs = prepare_pythonjob_inputs( + generate_structures, + function_inputs={"structure": bulk("Al"), "factor_lst": [0.95, 1.0, 1.05]}, + function_outputs=[{"name": "scaled_structures", "identifier": "namespace"}], +) + +result, node = run_get_node(PythonJob, inputs=inputs) +print("scaled_structures: ") +for key, value in result["scaled_structures"].items(): + print(key, value) + +###################################################################### +# Exit Code +# -------------- +# +# +# When the function returns a dictionary with an `exit_code` key, the system +# automatically parses and uses this code to indicate the task's status. In +# the case of an error, the non-zero `exit_code` value helps identify the specific problem. +# +# + + +def add(x, y): + sum = x + y + if (sum < 0).any(): + exit_code = {"status": 410, "message": "Some elements are negative"} + return {"sum": sum, "exit_code": exit_code} + return {"sum": sum} + + +inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": -21}, +) + +result, node = run_get_node(PythonJob, inputs=inputs) +print("exit_status:", node.exit_status) +print("exit_message:", node.exit_message) + + +###################################################################### +# Define your data serializer +# -------------- +# +# PythonJob search data serializer from the `aiida.data` entry point by the +# module name and class name (e.g., `ase.atoms.Atoms`). +# +# In order to let the PythonJob find the serializer, you must register the +# AiiDA data with the following format: +# +# .. code-block:: ini +# +# [project.entry-points."aiida.data"] +# abc.ase.atoms.Atoms = "abc.xyz:MyAtomsData" +# +# This will register a data serializer for `ase.atoms.Atoms` data. `abc` is +# the plugin name, the module name is `xyz`, and the AiiDA data class name is +# `AtomsData`. Learn how to create an AiiDA data class `here `_. +# +# *Avoid duplicate data serializer*: If you have multiple plugins that +# register the same data serializer, the PythonJob will raise an error. +# You can avoid this by selecting the plugin that you want to use in the configuration file. +# +# +# .. code-block:: json +# +# { +# "serializers": { +# "ase.atoms.Atoms": "abc.ase.atoms.Atoms" +# } +# } +# +# Save the configuration file as `pythonjob.json` in the aiida configuration +# directory (by default, `~/.aiida` directory). + + +###################################################################### +# What's Next +# ----------- +# +-----------------------------------------+------------------------------------------------------+ +# | `Tutorials <../tutorial/index.rst>`__ | Real-world examples in computational materials | +# | | science and more. | +# | | | +# +-----------------------------------------+------------------------------------------------------+ +# +# diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..9eb3b97 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,8 @@ +sphinx_rtd_theme==1.2.2 +sphinx-gallery +nbsphinx==0.9.2 +ipython +aiida-core +ase +furo +matplotlib # for sphinx-gallery diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..226806d --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,161 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import shutil +import sys +from pathlib import Path + +sys.path.insert(0, os.path.abspath("../..")) + + +# -- Project information ----------------------------------------------------- + +project = "AiiDA PythonJob" +copyright = "2024, Xing Wang" +author = "Xing Wang" + +# version = "" + +# The master toctree document. +master_doc = "index" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx_rtd_theme", + "sphinx.ext.autodoc", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "nbsphinx", + "sphinx_gallery.gen_gallery", +] + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +gallery_src_relative_dir = "../gallery" # relative path of the gallery src wrt. sphinx src +sphinx_src_autogen_dirs = [ + "autogen", +] +# we mimik the structure in the sphinx src directory in the gallery src directory + +# path of python scripts that should be executed +gallery_src_dirs = [os.path.join(gallery_src_relative_dir, autogen_dir) for autogen_dir in sphinx_src_autogen_dirs] +sphinx_gallery_conf = { + "filename_pattern": "/*", + "examples_dirs": gallery_src_dirs, # in sphinx-gallery doc referred as gallery source + "gallery_dirs": sphinx_src_autogen_dirs, # path to where to gallery puts generated files +} + +exclude_patterns = [] +# ignore in the autogenerated ipynb files to surpress warning +exclude_patterns.extend( + [os.path.join(sphinx_src_autogen_dir, "*ipynb") for sphinx_src_autogen_dir in sphinx_src_autogen_dirs] +) + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'alabaste +html_theme = "furo" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] +html_css_files = [ + "css/theme.css", +] + +html_theme_options = { + "source_repository": "https://github.com/aiidateam/aiida-pythonjob/", + "source_branch": "main", + "source_directory": "docs/source", + "footer_icons": [ + { + "name": "GitHub", + "url": "https://github.com/aiidateam/aiida-pythonjob", + "html": """ + + + + """, + "class": "", + }, + ], + # "announcement": "Important announcement!", +} + +# pygments_style = "colorful" +# pygments_dark_style = "monokai" + + +# Function to copy HTML files +def copy_html_files(app, exception): + """ + Copy all .html files from source to build directory, maintaining the directory structure. + """ + copy_print_info = "Copying HTML files to build directory" + print() + print(copy_print_info) + print(len(copy_print_info) * "=") + if exception is not None: # Only copy files if the build succeeded + print("Build failed, but we still try to copy the HTML files to the build directory") + try: + src_path = Path(app.builder.srcdir) + build_path = Path(app.builder.outdir) + + copy_print_info = f"Copying html files from sphinx src directory {src_path}" + print() + print(copy_print_info) + print(len(copy_print_info) * "-") + for html_file in src_path.rglob("*.html"): + relative_path = html_file.relative_to(src_path) + destination_file = build_path / relative_path + destination_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(html_file, destination_file) + print(f"Copy {html_file} to {destination_file}") + + gallery_src_path = Path(app.builder.srcdir / Path(gallery_src_relative_dir)) + + copy_print_info = f"Copying html files from gallery src directory {gallery_src_path} to build" + print() + print(copy_print_info) + print(len(copy_print_info) * "-") + for html_file in gallery_src_path.rglob("*.html"): + relative_path = html_file.relative_to(gallery_src_path) + destination_file = build_path / relative_path + destination_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(html_file, destination_file) + print(f"Copy {html_file} to {destination_file}") + except Exception as e: + print(f"Failed to copy HTML files: {e}") + + +def setup(app): + app.connect("build-finished", copy_html_files) diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..767d698 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,26 @@ + +AiiDA PythonJob +=========================================== + + +`PythonJob` allows users to run Python functions on a remote computer. It is designed to enable users from non-AiiDA communities to run their Python functions remotely and construct workflows with checkpoints, maintaining all data provenance. For instance, users can use ASE's calculator to run a DFT calculation on a remote computer directly. + +**Key Features** + +1. **Remote Execution**: Seamlessly run Python functions on a remote computer. +2. **User-Friendly**: Designed for users who are not familiar with AiiDA, simplifying the process of remote execution. +3. **Workflow Management**: Construct workflows using WorkGraph with checkpoints, ensuring that intermediate states and results are preserved. +4. **Data Provenance**: Maintain comprehensive data provenance, tracking the full history and transformations of data. + + +.. toctree:: + :maxdepth: 1 + :caption: Contents: + :hidden: + + installation + autogen/how_to + tutorial/index + + + diff --git a/docs/source/installation.rst b/docs/source/installation.rst new file mode 100644 index 0000000..64b8530 --- /dev/null +++ b/docs/source/installation.rst @@ -0,0 +1,44 @@ +============ +Installation +============ + +.. _installation:requirements: + +Requirements +============ + +To work with ``aiida-pythonjob``, you should have: + +* installed ``aiida-core`` +* configured an AiiDA profile. + +Please refer to the `documentation `_ of ``aiida-core`` for detailed instructions. + + +.. _installation:installation: + +Installation +============ + + +The recommended method of installation is to use the Python package manager |pip|_: + +.. code-block:: console + + $ pip install aiida-pythonjob + +This will install the latest stable version that was released to PyPI. + +To install the package from source, first clone the repository and then install using |pip|_: + +.. code-block:: console + + $ git clone https://github.com/aiidateam/aiida-pythonjob + $ cd aiida-pythonjob + $ pip install -e . + +The ``-e`` flag will install the package in editable mode, meaning that changes to the source code will be automatically picked up. + + +.. |pip| replace:: ``pip`` +.. _pip: https://pip.pypa.io/en/stable/ diff --git a/docs/source/pythonjob.ipynb b/docs/source/pythonjob.ipynb new file mode 100644 index 0000000..6407202 --- /dev/null +++ b/docs/source/pythonjob.ipynb @@ -0,0 +1,2503 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "22d177dc-6cfb-4de2-9509-f1eb45e10cf2", + "metadata": {}, + "source": [ + "# PythonJob\n", + "## Introduction\n", + "\n", + "The `PythonJob` is a built-in task that allows users to run Python functions on a remote computer. It is designed to enable users from non-AiiDA communities to run their Python functions remotely and construct workflows with checkpoints, maintaining all data provenance. For instance, users can use ASE's calculator to run a DFT calculation on a remote computer directly. Users only need to write normal Python code, and the WorkGraph will handle the data transformation to AiiDA data.\n", + "\n", + "### Key Features\n", + "\n", + "1. **Remote Execution**: Seamlessly run Python functions on a remote computer.\n", + "2. **User-Friendly**: Designed for users who are not familiar with AiiDA, simplifying the process of remote execution.\n", + "3. **Workflow Management**: Construct workflows with checkpoints, ensuring that intermediate states and results are preserved.\n", + "4. **Data Provenance**: Maintain comprehensive data provenance, tracking the full history and transformations of data.\n", + "\n", + "\n", + "Load the AiiDA profile." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c6b83fb5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Profile" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%load_ext aiida\n", + "from aiida import load_profile\n", + "load_profile()" + ] + }, + { + "cell_type": "markdown", + "id": "0f46d277", + "metadata": {}, + "source": [ + "## First Workflow\n", + "Suppose you need to calculate `(x + y) * z` in two steps: first, add `x` and `y`; second, multiply the result by `z`.\n", + "\n", + "There are three methods to declare a `PythonJob` task in a workflow:\n", + "\n", + "1. **Using the `task.pythonjob` decorator:** Apply this decorator directly when you define the function. This method is straightforward and integrates the task declaration seamlessly with function definition.\n", + "\n", + "2. **Converting an existing function with `task.pythonjob`:** If the function is already defined, you can convert it into a `PythonJob` task by wrapping it with the `task.pythonjob` decorator. This approach is useful when adapting pre-existing code into a task-based workflow.\n", + "\n", + "3. **Specifying `PythonJob` during task addition to the WorkGraph:** When adding a task to the WorkGraph, you can explicitly identify it as a `PythonJob`. This method offers flexibility, allowing you to dynamically assign the task type based on the workflow design requirements.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9d9e24f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph import WorkGraph, task\n", + "\n", + "# decorator to define a pythonjob\n", + "@task.pythonjob()\n", + "def add(x, y):\n", + " return x + y\n", + "\n", + "\n", + "# here is a normal python function\n", + "def multiply(x, y):\n", + " return x*y\n", + "\n", + "wg = WorkGraph(\"first_workflow\")\n", + "wg.add_task(add, name=\"add\")\n", + "# we can also use a normal python function directly, but provide the \"PythonJob\" as the first argument\n", + "wg.add_task(\"PythonJob\", function=multiply, name=\"multiply\", x=wg.tasks[\"add\"].outputs[0])\n", + "\n", + "# visualize the workgraph\n", + "wg.to_html()\n", + "# visualize the workgraph in jupyter-notebook\n", + "# wg" + ] + }, + { + "cell_type": "markdown", + "id": "9bd7aa49", + "metadata": {}, + "source": [ + "### Prepare the inputs and submit the workgraph\n", + "\n", + "\n", + "**Code**: We can set the `computer` to the remote computer where we want to run the job. This will create a code `python3@computer` if it does not already exist. You can also set the `code` directly if you have already created the code.\n", + "\n", + "**Data**: Users are recommended to use normal Python data as input. The workgraph will handle the transfer and serialization of data to AiiDA data. When serializing to AiiDA data, the workgraph will first search for the corresponding AiiDA data entry point based on the module name and class name (e.g., `ase.atoms.Atoms`). If the corresponding entry point exists, it will be used to serialize the value. If not found, `GeneralData` (pickle) will be used to serialize the value into binary data.\n", + "\n", + "**Python Version**: Since pickle is used to store and load data, the Python version on the remote computer should match the one used on the localhost. You can use conda to create a virtual environment with the same Python version. Activate the environment before running the script.\n", + "\n", + "For real applications, you can pass metadata to the scheduler to activate the conda environment:\n", + "\n", + "```python\n", + "metadata = {\n", + " \"options\": {\n", + " 'custom_scheduler_commands': 'module load anaconda\\nconda activate py3.11\\n',\n", + " }\n", + "}\n", + "```\n", + "\n", + "#### Create a conda environment on the remote computer\n", + "One can use the `create_conda_env` function to create a conda environment on the remote computer. The function will create a conda environment with the specified packages and modules. The function will update the packages if the environment already exists.\n", + "\n", + "```python\n", + "from aiida_workgraph.utils import create_conda_env\n", + "# create a conda environment on remote computer\n", + "create_conda_env(\"merlin6\", \"test_pythonjob\", modules=[\"anaconda\"],\n", + " pip=[\"numpy\", \"matplotlib\"],\n", + " conda={\"channels\": [\"conda-forge\"],\n", + " \"dependencies\": [\"qe\"]},\n", + " )\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "02464256", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151133\n", + "\n", + "Result of multiply is uuid: ffd35a52-a7c3-4675-adba-a2108cc242cd (pk: 151154) value: 20 \n", + "\n", + "\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N151133\n", + "\n", + "WorkGraph<first_workflow> (151133)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151140\n", + "\n", + "PythonJob<add> (151140)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151133->N151140\n", + "\n", + "\n", + "CALL_CALC\n", + "add\n", + "\n", + "\n", + "\n", + "N151150\n", + "\n", + "PythonJob<multiply> (151150)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151133->N151150\n", + "\n", + "\n", + "CALL_CALC\n", + "multiply\n", + "\n", + "\n", + "\n", + "N151155\n", + "\n", + "Int (151155)\n", + "\n", + "\n", + "\n", + "N151133->N151155\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151142\n", + "\n", + "RemoteData (151142)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151140->N151142\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151143\n", + "\n", + "FolderData (151143)\n", + "\n", + "\n", + "\n", + "N151140->N151143\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151144\n", + "\n", + "Int (151144)\n", + "\n", + "\n", + "\n", + "N151140->N151144\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151144->N151150\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__x\n", + "\n", + "\n", + "\n", + "N151152\n", + "\n", + "RemoteData (151152)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151150->N151152\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151153\n", + "\n", + "FolderData (151153)\n", + "\n", + "\n", + "\n", + "N151150->N151153\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151154\n", + "\n", + "Int (151154)\n", + "\n", + "\n", + "\n", + "N151150->N151154\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph.utils import generate_node_graph\n", + "\n", + "#------------------------- Submit the calculation -------------------\n", + "# For real applications, one can pass metadata to the scheduler to activate the conda environment\n", + "metadata = {\n", + " \"options\": {\n", + " # 'custom_scheduler_commands' : 'module load anaconda\\nconda activate py3.11\\n',\n", + " 'custom_scheduler_commands' : '',\n", + " }\n", + "}\n", + "\n", + "wg.submit(inputs = {\"add\": {\"x\": 2, \"y\": 3,\n", + " \"computer\": \"localhost\",\n", + " \"metadata\": metadata},\n", + " \"multiply\": {\"y\": 4,\n", + " \"computer\": \"localhost\",\n", + " \"metadata\": metadata}},\n", + " wait=True)\n", + "#------------------------- Print the output -------------------------\n", + "print(\"\\nResult of multiply is {} \\n\\n\".format(wg.tasks[\"multiply\"].outputs['result'].value))\n", + "#------------------------- Generate node graph -------------------\n", + "generate_node_graph(wg.pk)" + ] + }, + { + "cell_type": "markdown", + "id": "66b34ef1", + "metadata": {}, + "source": [ + "## Use parent folder\n", + "The parent_folder parameter allows a task to access the output files of a parent task. This feature is particularly useful when you want to reuse data generated by a previous computation in subsequent computations. In the following example, the multiply task uses the `result.txt` file created by the add task.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "aa47c860", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph import WorkGraph, task\n", + "\n", + "def add(x, y):\n", + " z = x + y\n", + " with open(\"result.txt\", \"w\") as f:\n", + " f.write(str(z))\n", + "\n", + "def multiply(x, y):\n", + " with open(\"parent_folder/result.txt\", \"r\") as f:\n", + " z = int(f.read())\n", + " return x*y + z\n", + "\n", + "wg = WorkGraph(\"PythonJob_parent_folder\")\n", + "wg.add_task(\"PythonJob\", function=add, name=\"add\")\n", + "wg.add_task(\"PythonJob\", function=multiply, name=\"multiply\",\n", + " parent_folder=wg.tasks[\"add\"].outputs[\"remote_folder\"],\n", + " )\n", + "\n", + "wg.to_html()" + ] + }, + { + "cell_type": "markdown", + "id": "7c4650ac", + "metadata": {}, + "source": [ + "Submit the workgraph and print the result." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6367b6eb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151156\n", + "\n", + "Result of multiply is uuid: 0849fb0a-d461-4a08-8e73-89fb4f848d59 (pk: 151178) value: 17 \n", + "\n", + "\n" + ] + } + ], + "source": [ + "wg.submit(inputs = {\"add\": {\"x\": 2, \"y\": 3, \"computer\": \"localhost\"},\n", + " \"multiply\": {\"x\": 3, \"y\": 4, \"computer\": \"localhost\"}},\n", + " wait=True)\n", + "print(\"\\nResult of multiply is {} \\n\\n\".format(wg.tasks[\"multiply\"].outputs['result'].value))" + ] + }, + { + "cell_type": "markdown", + "id": "1bc31c81", + "metadata": {}, + "source": [ + "## Upload files or folders to the remote computer\n", + "The `upload_files` parameter allows users to upload files or folders to the remote computer. The files will be uploaded to the working directory of the remote computer.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ba00e2ce", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151180\n", + "\n", + "Result of add is uuid: 14393adc-ddfd-4b2a-8754-d7d785a8c3e0 (pk: 151191) value: 5 \n", + "\n", + "\n" + ] + } + ], + "source": [ + "from aiida_workgraph import WorkGraph, task\n", + "\n", + "# create a temporary file \"input.txt\" in the current directory\n", + "with open(\"input.txt\", \"w\") as f:\n", + " f.write(\"2\")\n", + "\n", + "# create a temporary folder \"inputs_folder\" in the current directory\n", + "# and add a file \"another_input.txt\" in the folder\n", + "import os\n", + "os.makedirs(\"inputs_folder\", exist_ok=True)\n", + "with open(\"inputs_folder/another_input.txt\", \"w\") as f:\n", + " f.write(\"3\")\n", + "\n", + "def add():\n", + " with open(\"input.txt\", \"r\") as f:\n", + " a = int(f.read())\n", + " with open(\"inputs_folder/another_input.txt\", \"r\") as f:\n", + " b = int(f.read())\n", + " return a + b\n", + "\n", + "\n", + "wg = WorkGraph(\"PythonJob_upload_files\")\n", + "wg.add_task(\"PythonJob\", function=add, name=\"add\")\n", + "\n", + "#------------------------- Submit the calculation -------------------\n", + "# we need use full path to the file\n", + "input_file = os.path.abspath(\"input.txt\")\n", + "input_folder = os.path.abspath(\"inputs_folder\")\n", + "\n", + "wg.submit(inputs = {\"add\": {\n", + " \"computer\": \"localhost\",\n", + " \"upload_files\": {\"input.txt\": input_file,\n", + " \"inputs_folder\": input_folder,\n", + " },\n", + " },\n", + " },\n", + " wait=True)\n", + "print(\"\\nResult of add is {} \\n\\n\".format(wg.tasks[\"add\"].outputs['result'].value))" + ] + }, + { + "cell_type": "markdown", + "id": "2174a45e", + "metadata": {}, + "source": [ + "## First Real-world Workflow: atomization energy of molecule\n", + "\n", + "The atomization energy, $\\Delta E$, of a molecule can be expressed as:\n", + "\n", + "$$\n", + "\\Delta E = n_{\\text{atom}} \\times E_{\\text{atom}} - E_{\\text{molecule}}\n", + "$$\n", + "\n", + "Where:\n", + "- $\\Delta E$ is the atomization energy of the molecule.\n", + "- $n_{\\text{atom}}$ is the number of atoms.\n", + "- $E_{\\text{atom}}$ is the energy of an isolated atom.\n", + "- $E_{\\text{molecule}}$ is the energy of the molecule.\n", + "\n", + "\n", + "### Define a task to calculate the energy of the atoms using EMT potential" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "713da634", + "metadata": {}, + "outputs": [], + "source": [ + "from aiida_workgraph import task, WorkGraph\n", + "\n", + "def emt(atoms):\n", + " from ase.calculators.emt import EMT\n", + " atoms.calc = EMT()\n", + " energy = atoms.get_potential_energy()\n", + " return energy\n", + "\n", + "\n", + "def atomization_energy(mol, energy_molecule, energy_atom):\n", + " energy = energy_atom*len(mol) - energy_molecule\n", + " return energy\n" + ] + }, + { + "cell_type": "markdown", + "id": "00a7531e", + "metadata": {}, + "source": [ + "### Define a workgraph\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a81fa9e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wg = WorkGraph(\"atomization_energy\")\n", + "pw_atom = wg.add_task(\"PythonJob\", function=emt, name=\"emt_atom\")\n", + "pw_mol = wg.add_task(\"PythonJob\", function=emt, name=\"emt_mol\")\n", + "wg.add_task(\"PythonJob\", function=atomization_energy, name=\"atomization_energy\",\n", + " energy_atom=pw_atom.outputs[\"result\"],\n", + " energy_molecule=pw_mol.outputs[\"result\"])\n", + "wg.to_html()" + ] + }, + { + "cell_type": "markdown", + "id": "b686f3ba", + "metadata": {}, + "source": [ + "### Prepare the inputs and submit the workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "11e3bca1-dda6-44e9-9585-54feeda7e7db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151193\n", + "Energy of a N atom: 5.100\n", + "Energy of an un-relaxed N2 molecule: 0.549\n", + "Atomization energy: 9.651 eV\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N151193\n", + "\n", + "WorkGraph<atomization_energy> (151193)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151199\n", + "\n", + "PythonJob<emt_mol> (151199)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151193->N151199\n", + "\n", + "\n", + "CALL_CALC\n", + "emt_mol\n", + "\n", + "\n", + "\n", + "N151205\n", + "\n", + "PythonJob<emt_atom> (151205)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151193->N151205\n", + "\n", + "\n", + "CALL_CALC\n", + "emt_atom\n", + "\n", + "\n", + "\n", + "N151219\n", + "\n", + "PythonJob<atomization_energy> (151219)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151193->N151219\n", + "\n", + "\n", + "CALL_CALC\n", + "atomization_energy\n", + "\n", + "\n", + "\n", + "N151224\n", + "\n", + "Int (151224)\n", + "\n", + "\n", + "\n", + "N151193->N151224\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151208\n", + "\n", + "RemoteData (151208)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151199->N151208\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151210\n", + "\n", + "FolderData (151210)\n", + "\n", + "\n", + "\n", + "N151199->N151210\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151212\n", + "\n", + "GeneralData (151212)\n", + "\n", + "\n", + "\n", + "N151199->N151212\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151209\n", + "\n", + "RemoteData (151209)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151205->N151209\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151211\n", + "\n", + "FolderData (151211)\n", + "\n", + "\n", + "\n", + "N151205->N151211\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151213\n", + "\n", + "GeneralData (151213)\n", + "\n", + "\n", + "\n", + "N151205->N151213\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151212->N151219\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__energy_molecule\n", + "\n", + "\n", + "\n", + "N151213->N151219\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__energy_atom\n", + "\n", + "\n", + "\n", + "N151221\n", + "\n", + "RemoteData (151221)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151219->N151221\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151222\n", + "\n", + "FolderData (151222)\n", + "\n", + "\n", + "\n", + "N151219->N151222\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151223\n", + "\n", + "GeneralData (151223)\n", + "\n", + "\n", + "\n", + "N151219->N151223\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ase.build import molecule\n", + "from ase import Atoms\n", + "\n", + "load_profile()\n", + "\n", + "# create input structure\n", + "n_atom = Atoms(\"N\", pbc=True)\n", + "n_atom.center(vacuum=5.0)\n", + "n2_molecule = molecule(\"N2\", pbc=True)\n", + "n2_molecule.center(vacuum=5.0)\n", + "\n", + "\n", + "#------------------------- Set the inputs -------------------------\n", + "wg.tasks[\"emt_atom\"].set({\"atoms\": n_atom, \"computer\": \"localhost\"})\n", + "wg.tasks[\"emt_mol\"].set({\"atoms\": n2_molecule, \"computer\": \"localhost\"})\n", + "wg.tasks[\"atomization_energy\"].set({\"mol\": n2_molecule, \"computer\": \"localhost\"})\n", + "#------------------------- Submit the calculation -------------------\n", + "wg.submit(wait=True, timeout=200)\n", + "#------------------------- Print the output -------------------------\n", + "print('Energy of a N atom: {:0.3f}'.format(wg.tasks['emt_atom'].outputs[\"result\"].value.value))\n", + "print('Energy of an un-relaxed N2 molecule: {:0.3f}'.format(wg.tasks['emt_mol'].outputs[\"result\"].value.value))\n", + "print('Atomization energy: {:0.3f} eV'.format(wg.tasks['atomization_energy'].outputs[\"result\"].value.value))\n", + "#------------------------- Generate node graph -------------------\n", + "generate_node_graph(wg.pk)\n" + ] + }, + { + "cell_type": "markdown", + "id": "d25beb02-ee82-4a27-ae48-edc5c147904c", + "metadata": {}, + "source": [ + "## Call shell commands in the PythonJob task\n", + "\n", + "We want to calculate `(x+y)*z` in two steps using `echo` and `bc` commands.\n", + "\n", + "Step 1: Calculate (x+y) and store it as result\n", + "```\n", + "result=$(echo \"$x + $y\" | bc)\n", + "```\n", + "\n", + "Step 2: Multiply result by z and store the final result\n", + "```\n", + "result=$(echo \"$result * $z\" | bc)\n", + "```\n", + "\n", + "If one wanted to run this workflow in AiiDA, one would have to write plugins for `echo` and `bc` commands, and a WorkChain to handle the workflow. With aiida-workgraph and the `PythonJob` task, this can be run through AiiDA with the following workgraph:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d8471a01", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph import task, WorkGraph\n", + "\n", + "def add(x, y):\n", + " import os\n", + " os.system(\"echo '{} + {}' | bc > result.txt\".format(x, y))\n", + " with open(\"result.txt\", \"r\") as f:\n", + " return float(f.read())\n", + "\n", + "\n", + "def multiply(x, y):\n", + " import os\n", + " os.system(\"echo '{} * {}' | bc > result.txt\".format(x, y))\n", + " with open(\"result.txt\", \"r\") as f:\n", + " return float(f.read())\n", + "\n", + "\n", + "wg = WorkGraph(\"PythonJob_shell_command\")\n", + "wg.add_task(\"PythonJob\", function=add, name=\"add\")\n", + "wg.add_task(\"PythonJob\", function=multiply, name=\"multiply\", x=wg.tasks[\"add\"].outputs[0])\n", + "\n", + "# visualize the workgraph\n", + "wg.to_html()\n" + ] + }, + { + "cell_type": "markdown", + "id": "9cb86fa4", + "metadata": {}, + "source": [ + "submit the workgraph and print the result:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "df7976d2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151225\n", + "\n", + "Result of multiply is uuid: deab3241-85b3-47d6-9551-9d8d14dc255d (pk: 151246) value: 20.0 \n", + "\n", + "\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N151225\n", + "\n", + "WorkGraph<PythonJob_shell_command> (151225)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151232\n", + "\n", + "PythonJob<add> (151232)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151225->N151232\n", + "\n", + "\n", + "CALL_CALC\n", + "add\n", + "\n", + "\n", + "\n", + "N151242\n", + "\n", + "PythonJob<multiply> (151242)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151225->N151242\n", + "\n", + "\n", + "CALL_CALC\n", + "multiply\n", + "\n", + "\n", + "\n", + "N151247\n", + "\n", + "Int (151247)\n", + "\n", + "\n", + "\n", + "N151225->N151247\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151234\n", + "\n", + "RemoteData (151234)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151232->N151234\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151235\n", + "\n", + "FolderData (151235)\n", + "\n", + "\n", + "\n", + "N151232->N151235\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151236\n", + "\n", + "Float (151236)\n", + "\n", + "\n", + "\n", + "N151232->N151236\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151236->N151242\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__x\n", + "\n", + "\n", + "\n", + "N151244\n", + "\n", + "RemoteData (151244)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151242->N151244\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151245\n", + "\n", + "FolderData (151245)\n", + "\n", + "\n", + "\n", + "N151242->N151245\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151246\n", + "\n", + "Float (151246)\n", + "\n", + "\n", + "\n", + "N151242->N151246\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "wg.submit(inputs = {\"add\": {\"x\": 2, \"y\": 3, \"computer\": \"localhost\"},\n", + " \"multiply\": {\"y\": 4, \"computer\": \"localhost\"}},\n", + " wait=True)\n", + "#------------------------- Print the output -------------------------\n", + "print(\"\\nResult of multiply is {} \\n\\n\".format(wg.tasks[\"multiply\"].outputs['result'].value))\n", + "#------------------------- Generate node graph -------------------\n", + "generate_node_graph(wg.pk)" + ] + }, + { + "cell_type": "markdown", + "id": "824a056d", + "metadata": {}, + "source": [ + "## Note\n", + "One can not run a `graph_builder` task in a `PythonJob` task. The `graph_builder` task is used to build the workgraph, and it should be run in the localhost by the daemon.\n", + "\n", + "However, one can run a `PythonJob` task in a `graph_builder` task. The `PythonJob` task will be executed on the remote computer.\n", + "\n", + "The following code will raise an error:\n", + "\n", + "```python\n", + "from aiida_workgraph import task, WorkGraph\n", + "\n", + "\n", + "@task.graph_builder()\n", + "def add_multiply():\n", + " wg = WorkGraph()\n", + " return wg\n", + "\n", + "wg = WorkGraph()\n", + "wg.add_task(\"PythonJob\", function=add_multiply, name=\"add_multiply\")\n", + "\n", + "---------------------------------------------------------------------------\n", + "ValueError Traceback (most recent call last)\n", + "/tmp/ipykernel_3498848/1351840398.py in ()\n", + " 8 \n", + " 9 wg = WorkGraph()\n", + "---> 10 wg.add_task(\"PythonJob\", function=add_multiply, name=\"add_multiply\")\n", + "\n", + "~/repos/superstar54/aiida-workgraph/aiida_workgraph/collection.py in new(self, identifier, name, uuid, run_remotely, **kwargs)\n", + " 35 return super().new(identifier, name, uuid, **kwargs)\n", + " 36 if isinstance(identifier, str) and identifier.upper() == \"PYTHONJOB\":\n", + "---> 37 identifier, _ = build_pythonjob_task(kwargs.pop(\"function\"))\n", + " 38 return super().new(identifier, name, uuid, **kwargs)\n", + " 39 if isinstance(identifier, str) and identifier.upper() == \"SHELLJOB\":\n", + "\n", + "~/repos/superstar54/aiida-workgraph/aiida_workgraph/decorator.py in build_pythonjob_task(func)\n", + " 262 \n", + " 263 if func.node.node_type.upper() == \"GRAPH_BUILDER\":\n", + "--> 264 raise ValueError(\n", + " 265 \"GraphBuilder task cannot be run remotely. Please remove 'PythonJob'.\"\n", + " 266 )\n", + "\n", + "ValueError: GraphBuilder task cannot be run remotely. Please remove 'PythonJob'.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "c6eec323", + "metadata": {}, + "source": [ + "However, the following code will work:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3b226eb7", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d30b54834fbc48c0908ae07d06594818", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "NodeGraphWidget(settings={'minmap': False}, style={'width': '80%', 'height': '600px'}, value={'name': 'add_mul…" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph import task, WorkGraph\n", + "\n", + "\n", + "@task.graph_builder()\n", + "def add_multiply():\n", + " wg = WorkGraph()\n", + " wg.add_task(\"PythonJob\", function=add, name=\"add\")\n", + " return wg\n", + "\n", + "wg = WorkGraph()\n", + "wg.add_task(add_multiply, name=\"add_multiply\")" + ] + }, + { + "cell_type": "markdown", + "id": "68f96103", + "metadata": {}, + "source": [ + "### Using `parent_folder_name` for Data Continuity\n", + "\n", + "AiiDA runs each job in a separate folder. If one calculation requires data from previous calculations to be accessible in the current job's working directory. This has been managed with the `parent_folder` input, which specifies a source for copying necessary data. The new `parent_folder_name` input streamlines this process by allowing users to define a subfolder within the working directory to organize these files effectively.\n", + "\n", + "#### Example Usage: NSCF Calculation\n", + "In the context of an NSCF calculation, where data dependency exists on outputs from a SCF calculation, the workflow can be configured as follows:\n", + "\n", + "```python\n", + "nscf_task = wg.add_task(\"PythonJob\",\n", + " function=pw_calculator,\n", + " name=\"nscf\",\n", + " parent_folder=scf_task.outputs[\"remote_folder\"],\n", + " parent_output_folder=\"out\",\n", + " parent_folder_name=\"out\",\n", + ")\n", + "```\n", + "This setup will copy all content of the `out` folder from the SCF calculation's remote folder into an `out` folder within the working directory of the NSCF job.\n", + "\n", + "### Handling Multiple Data Sources with `copy_files`\n", + "The traditional `parent_folder` method is limited when calculations require inputs from multiple remote directories. For instance, Bader charge analysis with Quantum ESPRESSO may need both valence and all-electron density data from different calculations.\n", + "\n", + "The new `copy_files` input allows for flexible linkage to multiple remote folders. It facilitates copying necessary files from diverse sources into a single job's directory under dynamically generated subfolder names based on taskand socket names.\n", + "\n", + "#### Example Usage: Bader Charge Analysis\n", + "For a Bader analysis requiring different charge density files:\n", + "\n", + "```python\n", + "bader_task = wg.add_task(\"PythonJob\",\n", + " function=bader_calculator,\n", + " name=\"bader\",\n", + " command=bader_command,\n", + " charge_density_folder=\"pp_valence_remote_folder\",\n", + " reference_charge_density_folder=\"pp_all_remote_folder\",\n", + ")\n", + "wg.add_link(pp_valence.outputs[\"remote_folder\"], bader_task.inputs[\"copy_files\"])\n", + "wg.add_link(pp_all.outputs[\"remote_folder\"], bader_task.inputs[\"copy_files\"])\n", + "```\n", + "\n", + "The `bader_calculator` function using specified charge density data:\n", + "\n", + "```python\n", + "def bader_calculator(\n", + " command: str = \"pw.x\",\n", + " charge_density_folder: str = \"./\",\n", + " charge_density_filename: str = \"charge_density.cube\",\n", + " reference_charge_density_folder: str = \"./\",\n", + " reference_charge_density_filename: str = \"charge_density.cube\",\n", + "):\n", + " \"\"\"Run Bader charge analysis.\"\"\"\n", + " command_str = f\"{command} {charge_density_folder}/{charge_density_filename}\"\n", + " if reference_charge_density_filename:\n", + " command_str += f\" -ref {reference_charge_density_folder}/{reference_charge_density_filename}\"\n", + " os.system(command_str)\n", + "\n", + " with open(\"ACF.dat\", \"r\") as f:\n", + " lines = f.readlines()\n", + " charges = [float(line.split()[4]) for line in lines[2:-4]]\n", + "\n", + " return charges\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a04617ca", + "metadata": {}, + "source": [ + "\n", + "## Namespace Output\n", + "\n", + "The `PythonJob` allows users to define namespace outputs. A namespace output is a dictionary with keys and values returned by a function. Each value in this dictionary will be serialized to AiiDA data, and the key-value pair will be stored in the database.\n", + "\n", + "### Why Use Namespace Outputs?\n", + "\n", + "- **Dynamic and Flexible**: The keys and values in the namespace output are not fixed and can change based on the task's execution.\n", + "- **Querying**: The data in the namespace output is stored as an AiiDA data node, allowing for easy querying and retrieval.\n", + "- **Data Provenance**: When the data is used as input for subsequent tasks, the origin of data is tracked.\n", + "\n", + "### Example Use Case\n", + "\n", + "Consider a molecule adsorption calculation where the namespace output stores the surface slabs of the molecule adsorbed on different surface sites. The number of surface slabs can vary depending on the surface. These output surface slabs can be utilized as input to the next task to calculate the energy.\n", + "\n", + "### Defining Namespace Outputs\n", + "\n", + "To declare a namespace output, set the `identifier` to `workgraph.namespace` in the `outputs` parameter of the `@task` decorator. For example:\n", + "\n", + "```python\n", + "@task(outputs=[{\"name\": \"structures\", \"identifier\": \"workgraph.namespace\"}])\n", + "def generate_surface_slabs():\n", + " # Function logic to generate surface slabs\n", + " return {\"slab1\": slab_data1, \"slab2\": slab_data2}\n", + "```\n", + "\n", + "\n", + "One can also define nested namespace outputs by specifying the `identifier` as `workgraph.namespace` for sub-dictionaries within the namespace output. For example, here we define `add_multiply.add` as a nested namespace output:\n", + "\n", + "```python\n", + "@task(\n", + " outputs=[{\"name\": \"add_multiply\", \"identifier\": \"workgraph.namespace\"},\n", + " {\"name\": \"add_multiply.add\", \"identifier\": \"workgraph.namespace\"},\n", + " {\"name\": \"minus\"},\n", + " ]\n", + ")\n", + "def myfunc(x, y):\n", + " add = {\"order1\": x + y, \"order2\": x * x + y * y}\n", + " return {\n", + " \"add_multiply\": {\"add\": add, \"multiply\": x * y},\n", + " \"minus\": x - y,\n", + " }\n", + "```\n", + "\n", + "\n", + "### Using Namespace Outputs as Inputs\n", + "\n", + "A namespace output can be passed directly as an input to another task. It will be passed as a dictionary to the task, preserving the structure and allowing for flexible data handling.\n", + "\n", + "If you want to pass the value of a key in the namespace output as an input to another task, you need to define a output for that key. For example, to pass the value of `add_multiply.add` as an input to another task, you need to define an output for `add_multiply.add`:\n", + "\n", + "```python\n", + "@task(\n", + " outputs=[\n", + " {\"identifier\": \"workgraph.namespace\", \"name\": \"add_multiply\"},\n", + " {\"name\": \"add_multiply.add\"},\n", + " {\"name\": \"add_multiply.multiply\"},\n", + " {\"name\": \"minus\"},\n", + " ]\n", + ")\n", + "def myfunc(x, y):\n", + " return {\n", + " \"add_multiply\": {\"add\": x + y, \"multiply\": x * y},\n", + " \"minus\": x - y,\n", + " }\n", + "```\n", + "\n", + "then you can pass the value of `add_multiply.add` as an input to another task:\n", + "\n", + "```python\n", + "wg.add_task(\"PythonJob\",\n", + " function=myfunc3,\n", + " name=\"myfunc3\",\n", + " x=wg.tasks[\"myfunc\"].outputs[\"add_multiply.add\"],\n", + " )\n", + "```\n", + "\n", + "\n", + "## Second Real-world Workflow: Equation of state (EOS) WorkGraph\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "dd00841a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph import WorkGraph, task\n", + "from ase.build import bulk\n", + "from ase import Atoms\n", + "from aiida import load_profile\n", + "\n", + "load_profile()\n", + "\n", + "@task(outputs=[{\"name\": \"scaled_atoms\", \"identifier\": \"workgraph.namespace\"},\n", + " {\"name\": \"volumes\"}]\n", + ")\n", + "def generate_scaled_atoms(atoms: Atoms, scales: list) -> dict:\n", + " \"\"\"Scale the structure by the given scales.\"\"\"\n", + " volumes = {}\n", + " scaled_atoms = {}\n", + " for i in range(len(scales)):\n", + " atoms1 = atoms.copy()\n", + " atoms1.set_cell(atoms.cell * scales[i], scale_atoms=True)\n", + " scaled_atoms[f\"s_{i}\"] = atoms1\n", + " volumes[f\"s_{i}\"] = atoms1.get_volume()\n", + " return {\"scaled_atoms\": scaled_atoms, \"volumes\": volumes}\n", + "\n", + "@task()\n", + "def emt(atoms):\n", + " from ase.calculators.emt import EMT\n", + " atoms.calc = EMT()\n", + " energy = atoms.get_potential_energy()\n", + " return {\"energy\": energy}\n", + "\n", + "# Output result from context to the output socket\n", + "@task.graph_builder(outputs=[{\"name\": \"results\", \"from\": \"context.results\"}])\n", + "def calculate_enegies(scaled_atoms):\n", + " \"\"\"Run the scf calculation for each structure.\"\"\"\n", + " from aiida_workgraph import WorkGraph\n", + " wg = WorkGraph()\n", + " for key, atoms in scaled_atoms.items():\n", + " emt1 = wg.add_task(\"PythonJob\", function=emt, name=f\"emt1_{key}\", atoms=atoms)\n", + " emt1.set({\"computer\": \"localhost\"})\n", + " # save the output parameters to the context\n", + " emt1.set_context({\"result\": f\"results.{key}\"})\n", + " return wg\n", + "\n", + "\n", + "@task()\n", + "def fit_eos(volumes: dict, emt_results: dict) -> dict:\n", + " \"\"\"Fit the EOS of the data.\"\"\"\n", + " from ase.eos import EquationOfState\n", + " from ase.units import kJ\n", + "\n", + " volumes_list = []\n", + " energies = []\n", + " for key, data in emt_results.items():\n", + " energy = data[\"energy\"]\n", + " energies.append(energy)\n", + " volumes_list.append(volumes[key])\n", + " #\n", + " eos = EquationOfState(volumes_list, energies)\n", + " v0, e0, B = eos.fit()\n", + " # convert B to GPa\n", + " B = B / kJ * 1.0e24\n", + " eos = {\"energy unit\": \"eV\", \"v0\": v0, \"e0\": e0, \"B\": B}\n", + " return eos\n", + "\n", + "atoms = bulk(\"Au\", cubic=True)\n", + "\n", + "wg = WorkGraph(\"pythonjob_eos_emt\")\n", + "scale_atoms_task = wg.add_task(\"PythonJob\",\n", + " function=generate_scaled_atoms,\n", + " name=\"scale_atoms\",\n", + " atoms=atoms,\n", + " )\n", + " # -------- calculate_enegies -----------\n", + "calculate_enegies_task = wg.add_task(calculate_enegies,\n", + " name=\"calculate_enegies\",\n", + " scaled_atoms=scale_atoms_task.outputs[\"scaled_atoms\"],\n", + " )\n", + " # -------- fit_eos -----------\n", + "wg.add_task(\"PythonJob\",\n", + " function=fit_eos,\n", + " name=\"fit_eos\",\n", + " volumes=scale_atoms_task.outputs[\"volumes\"],\n", + " emt_results=calculate_enegies_task.outputs[\"results\"],\n", + " )\n", + "wg.to_html()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3d8072ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151263\n", + "The fitted EOS parameters are:\n" + ] + }, + { + "data": { + "text/plain": [ + "{'B': 167.61300824791,\n", + " 'e0': 0.006458727465855,\n", + " 'v0': 67.197735262521,\n", + " 'energy unit': 'eV'}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "wg.submit(\n", + " inputs={\"scale_atoms\": {\"atoms\": atoms,\n", + " \"scales\": [0.95, 1.0, 1.05],\n", + " \"computer\": \"localhost\"},\n", + " \"fit_eos\": {\"computer\": \"localhost\"}},\n", + " wait=True,\n", + " )\n", + "\n", + "print(\"The fitted EOS parameters are:\")\n", + "wg.tasks[\"fit_eos\"].outputs[\"result\"].value.value\n" + ] + }, + { + "cell_type": "markdown", + "id": "8321bb88", + "metadata": {}, + "source": [ + "Generate the node graph and check the data provenance." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "1f802430", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N151263\n", + "\n", + "WorkGraph<pythonjob_eos_emt> (151263)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151270\n", + "\n", + "PythonJob<scale_atoms> (151270)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151263->N151270\n", + "\n", + "\n", + "CALL_CALC\n", + "scale_atoms\n", + "\n", + "\n", + "\n", + "N151278\n", + "\n", + "WorkGraph<calculate_enegies> (151278)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151263->N151278\n", + "\n", + "\n", + "CALL_WORK\n", + "calculate_enegies\n", + "\n", + "\n", + "\n", + "N151311\n", + "\n", + "PythonJob<fit_eos> (151311)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151263->N151311\n", + "\n", + "\n", + "CALL_CALC\n", + "fit_eos\n", + "\n", + "\n", + "\n", + "N151316\n", + "\n", + "Int (151316)\n", + "\n", + "\n", + "\n", + "N151263->N151316\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151272\n", + "\n", + "RemoteData (151272)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151270->N151272\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151273\n", + "\n", + "FolderData (151273)\n", + "\n", + "\n", + "\n", + "N151270->N151273\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151274\n", + "\n", + "AtomsData (151274)\n", + "\n", + "\n", + "\n", + "N151270->N151274\n", + "\n", + "\n", + "CREATE\n", + "scaled_atoms__s_0\n", + "\n", + "\n", + "\n", + "N151275\n", + "\n", + "AtomsData (151275)\n", + "\n", + "\n", + "\n", + "N151270->N151275\n", + "\n", + "\n", + "CREATE\n", + "scaled_atoms__s_1\n", + "\n", + "\n", + "\n", + "N151276\n", + "\n", + "AtomsData (151276)\n", + "\n", + "\n", + "\n", + "N151270->N151276\n", + "\n", + "\n", + "CREATE\n", + "scaled_atoms__s_2\n", + "\n", + "\n", + "\n", + "N151277\n", + "\n", + "Dict (151277)\n", + "\n", + "\n", + "\n", + "N151270->N151277\n", + "\n", + "\n", + "CREATE\n", + "volumes\n", + "\n", + "\n", + "\n", + "N151274->N151278\n", + "\n", + "\n", + "INPUT_WORK\n", + "wg__tasks__emt1_s_0__inputs__atoms__property__value\n", + "\n", + "\n", + "\n", + "N151283\n", + "\n", + "PythonJob<emt1_s_0> (151283)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151274->N151283\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__atoms\n", + "\n", + "\n", + "\n", + "N151275->N151278\n", + "\n", + "\n", + "INPUT_WORK\n", + "wg__tasks__emt1_s_1__inputs__atoms__property__value\n", + "\n", + "\n", + "\n", + "N151288\n", + "\n", + "PythonJob<emt1_s_1> (151288)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151275->N151288\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__atoms\n", + "\n", + "\n", + "\n", + "N151276->N151278\n", + "\n", + "\n", + "INPUT_WORK\n", + "wg__tasks__emt1_s_2__inputs__atoms__property__value\n", + "\n", + "\n", + "\n", + "N151293\n", + "\n", + "PythonJob<emt1_s_2> (151293)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151276->N151293\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__atoms\n", + "\n", + "\n", + "\n", + "N151277->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__volumes\n", + "\n", + "\n", + "\n", + "N151278->N151283\n", + "\n", + "\n", + "CALL_CALC\n", + "emt1_s_0\n", + "\n", + "\n", + "\n", + "N151278->N151288\n", + "\n", + "\n", + "CALL_CALC\n", + "emt1_s_1\n", + "\n", + "\n", + "\n", + "N151278->N151293\n", + "\n", + "\n", + "CALL_CALC\n", + "emt1_s_2\n", + "\n", + "\n", + "\n", + "N151303\n", + "\n", + "Dict (151303)\n", + "\n", + "\n", + "\n", + "N151278->N151303\n", + "\n", + "\n", + "RETURN\n", + "results__s_0\n", + "\n", + "\n", + "\n", + "N151304\n", + "\n", + "Dict (151304)\n", + "\n", + "\n", + "\n", + "N151278->N151304\n", + "\n", + "\n", + "RETURN\n", + "results__s_1\n", + "\n", + "\n", + "\n", + "N151305\n", + "\n", + "Dict (151305)\n", + "\n", + "\n", + "\n", + "N151278->N151305\n", + "\n", + "\n", + "RETURN\n", + "results__s_2\n", + "\n", + "\n", + "\n", + "N151306\n", + "\n", + "Int (151306)\n", + "\n", + "\n", + "\n", + "N151278->N151306\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151297\n", + "\n", + "RemoteData (151297)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151283->N151297\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151300\n", + "\n", + "FolderData (151300)\n", + "\n", + "\n", + "\n", + "N151283->N151300\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151283->N151303\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151298\n", + "\n", + "RemoteData (151298)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151288->N151298\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151301\n", + "\n", + "FolderData (151301)\n", + "\n", + "\n", + "\n", + "N151288->N151301\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151288->N151304\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151299\n", + "\n", + "RemoteData (151299)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151293->N151299\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151302\n", + "\n", + "FolderData (151302)\n", + "\n", + "\n", + "\n", + "N151293->N151302\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151293->N151305\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151303->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__emt_results__s_0\n", + "\n", + "\n", + "\n", + "N151304->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__emt_results__s_1\n", + "\n", + "\n", + "\n", + "N151305->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__emt_results__s_2\n", + "\n", + "\n", + "\n", + "N151313\n", + "\n", + "RemoteData (151313)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151311->N151313\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151314\n", + "\n", + "FolderData (151314)\n", + "\n", + "\n", + "\n", + "N151311->N151314\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151315\n", + "\n", + "Dict (151315)\n", + "\n", + "\n", + "\n", + "N151311->N151315\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph.utils import generate_node_graph\n", + "\n", + "#------------------------- Generate node graph -------------------\n", + "generate_node_graph(wg.pk)" + ] + }, + { + "cell_type": "markdown", + "id": "a1cbc140", + "metadata": {}, + "source": [ + "## Retrieve additional files from the remote computer\n", + "Sometimes, one may want to retrieve additional files from the remote computer after the job has finished. For example, one may want to retrieve the output files generated by the `pw.x` calculation in Quantum ESPRESSO.\n", + "\n", + "One can use the `additional_retrieve_list` parameter to specify which files should be retrieved from the working directory and stored in the local repository after the job has finished" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e698190c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151317\n", + "File in the local repository: ['_scheduler-stderr.txt', '_scheduler-stdout.txt', 'aiida.out', 'result.txt', 'results.pickle']\n" + ] + } + ], + "source": [ + "from aiida_workgraph import WorkGraph\n", + "\n", + "def add(x, y):\n", + " z = x + y\n", + " with open(\"result.txt\", \"w\") as f:\n", + " f.write(str(z))\n", + " return x + y\n", + "\n", + "wg = WorkGraph(\"test_PythonJob_retrieve_files\")\n", + "wg.add_task(\"PythonJob\", function=add, name=\"add\")\n", + "# ------------------------- Submit the calculation -------------------\n", + "wg.submit(\n", + " inputs={\n", + " \"add\": {\n", + " \"x\": 2,\n", + " \"y\": 3,\n", + " \"computer\": \"localhost\",\n", + " \"metadata\": {\n", + " \"options\": {\n", + " \"additional_retrieve_list\": [\"result.txt\"],\n", + " }\n", + " }\n", + " },\n", + " },\n", + " wait=True,\n", + ")\n", + "\n", + "# ------------------------- Print the output -------------------------\n", + "filenames = wg.tasks['add'].outputs['retrieved'].value.list_object_names()\n", + "print(\"File in the local repository: \", filenames)" + ] + }, + { + "cell_type": "markdown", + "id": "fe376995", + "metadata": {}, + "source": [ + "We can see that the `result.txt` file is retrieved from the remote computer and stored in the local repository.\n", + "\n", + "## Exit Code\n", + "\n", + "The `PythonJob` task includes a built-in output socket, `exit_code`, which serves as a mechanism for error handling and status reporting during task execution. This `exit_code` is an integer value where `0` indicates a successful completion, and any non-zero value signals that an error occurred.\n", + "\n", + "### How it Works:\n", + "When the function returns a dictionary with an `exit_code` key, the system automatically parses and uses this code to indicate the task's status. In the case of an error, the non-zero `exit_code` value helps identify the specific problem.\n", + "\n", + "\n", + "### Benefits of `exit_code`:\n", + "\n", + "1. **Error Reporting:** \n", + " If the task encounters an error, the `exit_code` can communicate the reason. This is helpful during process inspection to determine why a task failed.\n", + "\n", + "2. **Error Handling and Recovery:** \n", + " You can utilize `exit_code` to add specific error handlers for particular exit codes. This allows you to modify the task's parameters and restart it.\n", + "\n", + "\n", + "Below is an example Python function that uses `exit_code` to handle potential errors:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a96cbbcb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151330\n", + "exit status: 410\n", + "exit message: Sum is negative\n" + ] + } + ], + "source": [ + "from aiida_workgraph import WorkGraph, task\n", + "\n", + "@task.pythonjob(outputs=[{\"name\": \"sum\"}])\n", + "def add(x: int, y: int) -> int:\n", + " sum = x + y\n", + " if sum < 0:\n", + " exit_code = {\"status\": 410, \"message\": \"Sum is negative\"}\n", + " return {\"sum\": sum, \"exit_code\": exit_code}\n", + " return {\"sum\": sum}\n", + "\n", + "wg = WorkGraph(\"test_PythonJob\")\n", + "wg.add_task(add, name=\"add\", x=1, y=-2)\n", + "wg.submit(wait=True)\n", + "\n", + "print(\"exit status: \", wg.tasks[\"add\"].node.exit_status)\n", + "print(\"exit message: \", wg.tasks[\"add\"].node.exit_message)" + ] + }, + { + "cell_type": "markdown", + "id": "8d4d935b", + "metadata": {}, + "source": [ + "In this example, the task failed with `exit_code = 410` due to the condition `Sum is negative`, which is also reflected in the state message.\n", + "\n", + "## Error-handling with `exit_code`\n", + "\n", + "One can register error handlers for specific exit codes to handle errors gracefully. This allows for customized error recovery strategies based on the specific error encountered.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d9ab42a3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151342\n", + "exit status: 0\n", + "exit message: None\n" + ] + } + ], + "source": [ + "\n", + "def handle_negative_sum(task) -> str:\n", + " \"\"\"Handle the failure code 410 of the `add`.\n", + " Simply make the inputs positive by taking the absolute value.\n", + " \"\"\"\n", + " # modify task inputs\n", + " task.set({\"x\": abs(task.inputs[\"x\"].value),\n", + " \"y\": abs(task.inputs[\"y\"].value)})\n", + " \n", + " msg = \"Run error handler: handle_negative_sum.\"\n", + " return msg\n", + "\n", + "\n", + "@task.pythonjob(outputs=[{\"name\": \"sum\"}],\n", + " error_handlers=[{\"handler\": handle_negative_sum,\n", + " \"exit_codes\": [410],\n", + " \"max_retries\": 5}])\n", + "@task.pythonjob(outputs=[{\"name\": \"sum\"}])\n", + "def add(x: int, y: int) -> int:\n", + " sum = x + y\n", + " if sum < 0:\n", + " exit_code = {\"status\": 410, \"message\": \"Sum is negative\"}\n", + " return {\"sum\": sum, \"exit_code\": exit_code}\n", + " return {\"sum\": sum}\n", + "\n", + "wg = WorkGraph(\"test_PythonJob\")\n", + "wg.add_task(add, name=\"add1\", x=1, y=-2, computer=\"localhost\")\n", + "wg.submit(wait=True)\n", + "print(\"exit status: \", wg.tasks[\"add1\"].node.exit_status)\n", + "print(\"exit message: \", wg.tasks[\"add1\"].node.exit_message)" + ] + }, + { + "cell_type": "markdown", + "id": "0c100bb7", + "metadata": {}, + "source": [ + "We can confirm that the task first fails again with a 410. Then the WorkGraph restarts the task with the new inputs, and it finishes successfully. " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "e06bf489", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[22mWorkGraph<151342> Finished [0]\n", + " ├── PythonJob<151349> Finished [410]\n", + " └── PythonJob<151360> Finished [0]\u001b[0m\n" + ] + } + ], + "source": [ + "%verdi process status {wg.pk}" + ] + }, + { + "cell_type": "markdown", + "id": "682fec82", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Define your data serializer\n", + "Workgraph search data serializer from the `aiida.data` entry point by the module name and class name (e.g., `ase.atoms.Atoms`). \n", + "\n", + "In order to let the workgraph find the serializer, you must register the AiiDA data with the following format:\n", + "```\n", + "[project.entry-points.\"aiida.data\"]\n", + "abc.ase.atoms.Atoms = \"abc.xyz:MyAtomsData\"\n", + "```\n", + "This will register a data serializer for `ase.atoms.Atoms` data. `abc` is the plugin name, module name is `xyz`, and the AiiDA data class name is `AtomsData`. Learn how to create a AiiDA data [here](https://aiida.readthedocs.io/projects/aiida-core/en/stable/topics/data_types.html#adding-support-for-custom-data-types).\n", + "\n", + "\n", + "### Avoid duplicate data serializer\n", + "If you have multiple plugins that register the same data serializer, the workgraph will raise an error. You can avoid this by selecting the plugin that you want to use in the configuration file.\n", + "\n", + "```json\n", + "{\n", + " \"serializers\": {\n", + " \"ase.atoms.Atoms\": \"abc.ase.atoms.Atoms\"\n", + " },\n", + "}\n", + "```\n", + "\n", + "Save the configuration file as `workgraph.json` in the aiida configuration directory (by default, `~/.aiida` directory).\n", + "\n", + "\n", + "## Use PythonJob outside WorkGraph\n", + "One can use the `PythonJob` task outside the WorkGraph to run a Python function on a remote computer. For example, in a `WorkChain` or run a single `CalcJob` calculation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9a1fa5e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result: 3\n" + ] + } + ], + "source": [ + "from aiida import orm, load_profile\n", + "from aiida.engine import run_get_node\n", + "from aiida_workgraph.calculations.python import PythonJob\n", + "\n", + "load_profile()\n", + "\n", + "python_code = orm.load_code(\"python3@localhost\")\n", + "\n", + "def add(x, y):\n", + " return x + y\n", + "\n", + "result, node = run_get_node(PythonJob, code=python_code,\n", + " function=add,\n", + " function_kwargs = {\"x\": orm.Int(1), \"y\": orm.Int(2)},\n", + " function_outputs=[{\"name\": \"add\"}])\n", + "\n", + "print(\"Result: \", result[\"add\"].value)\n" + ] + }, + { + "cell_type": "markdown", + "id": "4fb22545", + "metadata": {}, + "source": [ + "You can see more details on any process, including its inputs and outputs, using the verdi command:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "86e74979", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[22mProperty Value\n", + "----------- ------------------------------------\n", + "type PythonJob\n", + "state Finished [0]\n", + "pk 151415\n", + "uuid ff25998c-98d9-4d56-995a-fe9ecd66468a\n", + "label PythonJob\n", + "description\n", + "ctime 2024-09-13 10:46:05.231456+02:00\n", + "mtime 2024-09-13 10:46:08.263554+02:00\n", + "computer [1] localhost\n", + "\n", + "Inputs PK Type\n", + "---------------- ------ ---------------\n", + "function_kwargs\n", + " x 151412 Int\n", + " y 151413 Int\n", + "code 42316 InstalledCode\n", + "function 151411 PickledFunction\n", + "function_outputs 151414 List\n", + "\n", + "Outputs PK Type\n", + "------------- ------ ----------\n", + "add 151419 Int\n", + "remote_folder 151417 RemoteData\n", + "retrieved 151418 FolderData\u001b[0m\n" + ] + } + ], + "source": [ + "%verdi process show {node.pk}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.4 ('scinode')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "vscode": { + "interpreter": { + "hash": "2f450c1ff08798c4974437dd057310afef0de414c25d1fd960ad375311c3f6ff" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorial/dft.ipynb b/docs/source/tutorial/dft.ipynb new file mode 100644 index 0000000..f462dd9 --- /dev/null +++ b/docs/source/tutorial/dft.ipynb @@ -0,0 +1,1128 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "22d177dc-6cfb-4de2-9509-f1eb45e10cf2", + "metadata": {}, + "source": [ + "# DFT Calculation with ASE Calculator\n", + "## Introduction\n", + "\n", + "The `PythonJob` is a built-in task that allows users to run Python functions on a remote computer. For instance, users can use ASE's calculator to run a DFT calculation on a remote computer directly. Users only need to write normal Python code, and the WorkGraph will handle the data transformation to AiiDA data.\n", + "\n", + "The following examples are running with [AiiDA-WorkGraph](https://aiida-workgraph.readthedocs.io/en/latest/)." + ] + }, + { + "cell_type": "markdown", + "id": "2174a45e", + "metadata": {}, + "source": [ + "## First Real-world Workflow: atomization energy of molecule\n", + "\n", + "The atomization energy, $\\Delta E$, of a molecule can be expressed as:\n", + "\n", + "$$\n", + "\\Delta E = n_{\\text{atom}} \\times E_{\\text{atom}} - E_{\\text{molecule}}\n", + "$$\n", + "\n", + "Where:\n", + "- $\\Delta E$ is the atomization energy of the molecule.\n", + "- $n_{\\text{atom}}$ is the number of atoms.\n", + "- $E_{\\text{atom}}$ is the energy of an isolated atom.\n", + "- $E_{\\text{molecule}}$ is the energy of the molecule.\n", + "\n", + "\n", + "### Define a task to calculate the energy of the atoms using EMT potential" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "713da634", + "metadata": {}, + "outputs": [], + "source": [ + "from aiida_workgraph import task, WorkGraph\n", + "\n", + "def emt(atoms):\n", + " from ase.calculators.emt import EMT\n", + " atoms.calc = EMT()\n", + " energy = atoms.get_potential_energy()\n", + " return energy\n", + "\n", + "\n", + "def atomization_energy(mol, energy_molecule, energy_atom):\n", + " energy = energy_atom*len(mol) - energy_molecule\n", + " return energy\n" + ] + }, + { + "cell_type": "markdown", + "id": "00a7531e", + "metadata": {}, + "source": [ + "### Define a workgraph\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a81fa9e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wg = WorkGraph(\"atomization_energy\")\n", + "pw_atom = wg.add_task(\"PythonJob\", function=emt, name=\"emt_atom\")\n", + "pw_mol = wg.add_task(\"PythonJob\", function=emt, name=\"emt_mol\")\n", + "wg.add_task(\"PythonJob\", function=atomization_energy, name=\"atomization_energy\",\n", + " energy_atom=pw_atom.outputs[\"result\"],\n", + " energy_molecule=pw_mol.outputs[\"result\"])\n", + "wg.to_html()" + ] + }, + { + "cell_type": "markdown", + "id": "b686f3ba", + "metadata": {}, + "source": [ + "### Prepare the inputs and submit the workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "11e3bca1-dda6-44e9-9585-54feeda7e7db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151193\n", + "Energy of a N atom: 5.100\n", + "Energy of an un-relaxed N2 molecule: 0.549\n", + "Atomization energy: 9.651 eV\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N151193\n", + "\n", + "WorkGraph<atomization_energy> (151193)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151199\n", + "\n", + "PythonJob<emt_mol> (151199)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151193->N151199\n", + "\n", + "\n", + "CALL_CALC\n", + "emt_mol\n", + "\n", + "\n", + "\n", + "N151205\n", + "\n", + "PythonJob<emt_atom> (151205)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151193->N151205\n", + "\n", + "\n", + "CALL_CALC\n", + "emt_atom\n", + "\n", + "\n", + "\n", + "N151219\n", + "\n", + "PythonJob<atomization_energy> (151219)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151193->N151219\n", + "\n", + "\n", + "CALL_CALC\n", + "atomization_energy\n", + "\n", + "\n", + "\n", + "N151224\n", + "\n", + "Int (151224)\n", + "\n", + "\n", + "\n", + "N151193->N151224\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151208\n", + "\n", + "RemoteData (151208)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151199->N151208\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151210\n", + "\n", + "FolderData (151210)\n", + "\n", + "\n", + "\n", + "N151199->N151210\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151212\n", + "\n", + "GeneralData (151212)\n", + "\n", + "\n", + "\n", + "N151199->N151212\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151209\n", + "\n", + "RemoteData (151209)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151205->N151209\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151211\n", + "\n", + "FolderData (151211)\n", + "\n", + "\n", + "\n", + "N151205->N151211\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151213\n", + "\n", + "GeneralData (151213)\n", + "\n", + "\n", + "\n", + "N151205->N151213\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151212->N151219\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__energy_molecule\n", + "\n", + "\n", + "\n", + "N151213->N151219\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__energy_atom\n", + "\n", + "\n", + "\n", + "N151221\n", + "\n", + "RemoteData (151221)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151219->N151221\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151222\n", + "\n", + "FolderData (151222)\n", + "\n", + "\n", + "\n", + "N151219->N151222\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151223\n", + "\n", + "GeneralData (151223)\n", + "\n", + "\n", + "\n", + "N151219->N151223\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ase.build import molecule\n", + "from ase import Atoms\n", + "\n", + "load_profile()\n", + "\n", + "# create input structure\n", + "n_atom = Atoms(\"N\", pbc=True)\n", + "n_atom.center(vacuum=5.0)\n", + "n2_molecule = molecule(\"N2\", pbc=True)\n", + "n2_molecule.center(vacuum=5.0)\n", + "\n", + "\n", + "#------------------------- Set the inputs -------------------------\n", + "wg.tasks[\"emt_atom\"].set({\"atoms\": n_atom, \"computer\": \"localhost\"})\n", + "wg.tasks[\"emt_mol\"].set({\"atoms\": n2_molecule, \"computer\": \"localhost\"})\n", + "wg.tasks[\"atomization_energy\"].set({\"mol\": n2_molecule, \"computer\": \"localhost\"})\n", + "#------------------------- Submit the calculation -------------------\n", + "wg.submit(wait=True, timeout=200)\n", + "#------------------------- Print the output -------------------------\n", + "print('Energy of a N atom: {:0.3f}'.format(wg.tasks['emt_atom'].outputs[\"result\"].value.value))\n", + "print('Energy of an un-relaxed N2 molecule: {:0.3f}'.format(wg.tasks['emt_mol'].outputs[\"result\"].value.value))\n", + "print('Atomization energy: {:0.3f} eV'.format(wg.tasks['atomization_energy'].outputs[\"result\"].value.value))\n", + "#------------------------- Generate node graph -------------------\n", + "generate_node_graph(wg.pk)\n" + ] + }, + { + "cell_type": "markdown", + "id": "a04617ca", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Second Real-world Workflow: Equation of state (EOS) WorkGraph\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "dd00841a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph import WorkGraph, task\n", + "from ase.build import bulk\n", + "from ase import Atoms\n", + "from aiida import load_profile\n", + "\n", + "load_profile()\n", + "\n", + "@task(outputs=[{\"name\": \"scaled_atoms\", \"identifier\": \"workgraph.namespace\"},\n", + " {\"name\": \"volumes\"}]\n", + ")\n", + "def generate_scaled_atoms(atoms: Atoms, scales: list) -> dict:\n", + " \"\"\"Scale the structure by the given scales.\"\"\"\n", + " volumes = {}\n", + " scaled_atoms = {}\n", + " for i in range(len(scales)):\n", + " atoms1 = atoms.copy()\n", + " atoms1.set_cell(atoms.cell * scales[i], scale_atoms=True)\n", + " scaled_atoms[f\"s_{i}\"] = atoms1\n", + " volumes[f\"s_{i}\"] = atoms1.get_volume()\n", + " return {\"scaled_atoms\": scaled_atoms, \"volumes\": volumes}\n", + "\n", + "@task()\n", + "def emt(atoms):\n", + " from ase.calculators.emt import EMT\n", + " atoms.calc = EMT()\n", + " energy = atoms.get_potential_energy()\n", + " return {\"energy\": energy}\n", + "\n", + "# Output result from context to the output socket\n", + "@task.graph_builder(outputs=[{\"name\": \"results\", \"from\": \"context.results\"}])\n", + "def calculate_enegies(scaled_atoms):\n", + " \"\"\"Run the scf calculation for each structure.\"\"\"\n", + " from aiida_workgraph import WorkGraph\n", + " wg = WorkGraph()\n", + " for key, atoms in scaled_atoms.items():\n", + " emt1 = wg.add_task(\"PythonJob\", function=emt, name=f\"emt1_{key}\", atoms=atoms)\n", + " emt1.set({\"computer\": \"localhost\"})\n", + " # save the output parameters to the context\n", + " emt1.set_context({\"result\": f\"results.{key}\"})\n", + " return wg\n", + "\n", + "\n", + "@task()\n", + "def fit_eos(volumes: dict, emt_results: dict) -> dict:\n", + " \"\"\"Fit the EOS of the data.\"\"\"\n", + " from ase.eos import EquationOfState\n", + " from ase.units import kJ\n", + "\n", + " volumes_list = []\n", + " energies = []\n", + " for key, data in emt_results.items():\n", + " energy = data[\"energy\"]\n", + " energies.append(energy)\n", + " volumes_list.append(volumes[key])\n", + " #\n", + " eos = EquationOfState(volumes_list, energies)\n", + " v0, e0, B = eos.fit()\n", + " # convert B to GPa\n", + " B = B / kJ * 1.0e24\n", + " eos = {\"energy unit\": \"eV\", \"v0\": v0, \"e0\": e0, \"B\": B}\n", + " return eos\n", + "\n", + "atoms = bulk(\"Au\", cubic=True)\n", + "\n", + "wg = WorkGraph(\"pythonjob_eos_emt\")\n", + "scale_atoms_task = wg.add_task(\"PythonJob\",\n", + " function=generate_scaled_atoms,\n", + " name=\"scale_atoms\",\n", + " atoms=atoms,\n", + " )\n", + " # -------- calculate_enegies -----------\n", + "calculate_enegies_task = wg.add_task(calculate_enegies,\n", + " name=\"calculate_enegies\",\n", + " scaled_atoms=scale_atoms_task.outputs[\"scaled_atoms\"],\n", + " )\n", + " # -------- fit_eos -----------\n", + "wg.add_task(\"PythonJob\",\n", + " function=fit_eos,\n", + " name=\"fit_eos\",\n", + " volumes=scale_atoms_task.outputs[\"volumes\"],\n", + " emt_results=calculate_enegies_task.outputs[\"results\"],\n", + " )\n", + "wg.to_html()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3d8072ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WorkGraph process created, PK: 151263\n", + "The fitted EOS parameters are:\n" + ] + }, + { + "data": { + "text/plain": [ + "{'B': 167.61300824791,\n", + " 'e0': 0.006458727465855,\n", + " 'v0': 67.197735262521,\n", + " 'energy unit': 'eV'}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "wg.submit(\n", + " inputs={\"scale_atoms\": {\"atoms\": atoms,\n", + " \"scales\": [0.95, 1.0, 1.05],\n", + " \"computer\": \"localhost\"},\n", + " \"fit_eos\": {\"computer\": \"localhost\"}},\n", + " wait=True,\n", + " )\n", + "\n", + "print(\"The fitted EOS parameters are:\")\n", + "wg.tasks[\"fit_eos\"].outputs[\"result\"].value.value\n" + ] + }, + { + "cell_type": "markdown", + "id": "8321bb88", + "metadata": {}, + "source": [ + "Generate the node graph and check the data provenance." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "1f802430", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N151263\n", + "\n", + "WorkGraph<pythonjob_eos_emt> (151263)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151270\n", + "\n", + "PythonJob<scale_atoms> (151270)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151263->N151270\n", + "\n", + "\n", + "CALL_CALC\n", + "scale_atoms\n", + "\n", + "\n", + "\n", + "N151278\n", + "\n", + "WorkGraph<calculate_enegies> (151278)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151263->N151278\n", + "\n", + "\n", + "CALL_WORK\n", + "calculate_enegies\n", + "\n", + "\n", + "\n", + "N151311\n", + "\n", + "PythonJob<fit_eos> (151311)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151263->N151311\n", + "\n", + "\n", + "CALL_CALC\n", + "fit_eos\n", + "\n", + "\n", + "\n", + "N151316\n", + "\n", + "Int (151316)\n", + "\n", + "\n", + "\n", + "N151263->N151316\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151272\n", + "\n", + "RemoteData (151272)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151270->N151272\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151273\n", + "\n", + "FolderData (151273)\n", + "\n", + "\n", + "\n", + "N151270->N151273\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151274\n", + "\n", + "AtomsData (151274)\n", + "\n", + "\n", + "\n", + "N151270->N151274\n", + "\n", + "\n", + "CREATE\n", + "scaled_atoms__s_0\n", + "\n", + "\n", + "\n", + "N151275\n", + "\n", + "AtomsData (151275)\n", + "\n", + "\n", + "\n", + "N151270->N151275\n", + "\n", + "\n", + "CREATE\n", + "scaled_atoms__s_1\n", + "\n", + "\n", + "\n", + "N151276\n", + "\n", + "AtomsData (151276)\n", + "\n", + "\n", + "\n", + "N151270->N151276\n", + "\n", + "\n", + "CREATE\n", + "scaled_atoms__s_2\n", + "\n", + "\n", + "\n", + "N151277\n", + "\n", + "Dict (151277)\n", + "\n", + "\n", + "\n", + "N151270->N151277\n", + "\n", + "\n", + "CREATE\n", + "volumes\n", + "\n", + "\n", + "\n", + "N151274->N151278\n", + "\n", + "\n", + "INPUT_WORK\n", + "wg__tasks__emt1_s_0__inputs__atoms__property__value\n", + "\n", + "\n", + "\n", + "N151283\n", + "\n", + "PythonJob<emt1_s_0> (151283)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151274->N151283\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__atoms\n", + "\n", + "\n", + "\n", + "N151275->N151278\n", + "\n", + "\n", + "INPUT_WORK\n", + "wg__tasks__emt1_s_1__inputs__atoms__property__value\n", + "\n", + "\n", + "\n", + "N151288\n", + "\n", + "PythonJob<emt1_s_1> (151288)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151275->N151288\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__atoms\n", + "\n", + "\n", + "\n", + "N151276->N151278\n", + "\n", + "\n", + "INPUT_WORK\n", + "wg__tasks__emt1_s_2__inputs__atoms__property__value\n", + "\n", + "\n", + "\n", + "N151293\n", + "\n", + "PythonJob<emt1_s_2> (151293)\n", + "State: finished\n", + "Exit Code: 0\n", + "\n", + "\n", + "\n", + "N151276->N151293\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__atoms\n", + "\n", + "\n", + "\n", + "N151277->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__volumes\n", + "\n", + "\n", + "\n", + "N151278->N151283\n", + "\n", + "\n", + "CALL_CALC\n", + "emt1_s_0\n", + "\n", + "\n", + "\n", + "N151278->N151288\n", + "\n", + "\n", + "CALL_CALC\n", + "emt1_s_1\n", + "\n", + "\n", + "\n", + "N151278->N151293\n", + "\n", + "\n", + "CALL_CALC\n", + "emt1_s_2\n", + "\n", + "\n", + "\n", + "N151303\n", + "\n", + "Dict (151303)\n", + "\n", + "\n", + "\n", + "N151278->N151303\n", + "\n", + "\n", + "RETURN\n", + "results__s_0\n", + "\n", + "\n", + "\n", + "N151304\n", + "\n", + "Dict (151304)\n", + "\n", + "\n", + "\n", + "N151278->N151304\n", + "\n", + "\n", + "RETURN\n", + "results__s_1\n", + "\n", + "\n", + "\n", + "N151305\n", + "\n", + "Dict (151305)\n", + "\n", + "\n", + "\n", + "N151278->N151305\n", + "\n", + "\n", + "RETURN\n", + "results__s_2\n", + "\n", + "\n", + "\n", + "N151306\n", + "\n", + "Int (151306)\n", + "\n", + "\n", + "\n", + "N151278->N151306\n", + "\n", + "\n", + "RETURN\n", + "execution_count\n", + "\n", + "\n", + "\n", + "N151297\n", + "\n", + "RemoteData (151297)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151283->N151297\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151300\n", + "\n", + "FolderData (151300)\n", + "\n", + "\n", + "\n", + "N151283->N151300\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151283->N151303\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151298\n", + "\n", + "RemoteData (151298)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151288->N151298\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151301\n", + "\n", + "FolderData (151301)\n", + "\n", + "\n", + "\n", + "N151288->N151301\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151288->N151304\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151299\n", + "\n", + "RemoteData (151299)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151293->N151299\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151302\n", + "\n", + "FolderData (151302)\n", + "\n", + "\n", + "\n", + "N151293->N151302\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151293->N151305\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n", + "N151303->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__emt_results__s_0\n", + "\n", + "\n", + "\n", + "N151304->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__emt_results__s_1\n", + "\n", + "\n", + "\n", + "N151305->N151311\n", + "\n", + "\n", + "INPUT_CALC\n", + "function_kwargs__emt_results__s_2\n", + "\n", + "\n", + "\n", + "N151313\n", + "\n", + "RemoteData (151313)\n", + "@localhost\n", + "\n", + "\n", + "\n", + "N151311->N151313\n", + "\n", + "\n", + "CREATE\n", + "remote_folder\n", + "\n", + "\n", + "\n", + "N151314\n", + "\n", + "FolderData (151314)\n", + "\n", + "\n", + "\n", + "N151311->N151314\n", + "\n", + "\n", + "CREATE\n", + "retrieved\n", + "\n", + "\n", + "\n", + "N151315\n", + "\n", + "Dict (151315)\n", + "\n", + "\n", + "\n", + "N151311->N151315\n", + "\n", + "\n", + "CREATE\n", + "result\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from aiida_workgraph.utils import generate_node_graph\n", + "\n", + "#------------------------- Generate node graph -------------------\n", + "generate_node_graph(wg.pk)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aiida", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorial/html/atomization_energy.html b/docs/source/tutorial/html/atomization_energy.html new file mode 100644 index 0000000..6f0bd6a --- /dev/null +++ b/docs/source/tutorial/html/atomization_energy.html @@ -0,0 +1,290 @@ + + + + + + + Rete.js with React in Vanilla JS + + + + + + + + + + + + + + + + + + + + + +
+ + + diff --git a/docs/source/tutorial/html/pythonjob_eos_emt.html b/docs/source/tutorial/html/pythonjob_eos_emt.html new file mode 100644 index 0000000..6a4a448 --- /dev/null +++ b/docs/source/tutorial/html/pythonjob_eos_emt.html @@ -0,0 +1,290 @@ + + + + + + + Rete.js with React in Vanilla JS + + + + + + + + + + + + + + + + + + + + + +
+ + + diff --git a/docs/source/tutorial/index.rst b/docs/source/tutorial/index.rst new file mode 100644 index 0000000..be42331 --- /dev/null +++ b/docs/source/tutorial/index.rst @@ -0,0 +1,11 @@ + +Tutorials +=========================================== + +In this tutorials, you will see severl examples in real applications. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + dft diff --git a/examples/test_add.py b/examples/test_add.py new file mode 100644 index 0000000..2baf48a --- /dev/null +++ b/examples/test_add.py @@ -0,0 +1,15 @@ +from aiida import load_profile +from aiida.engine import run +from aiida_pythonjob import PythonJob, prepare_pythonjob_inputs + +load_profile() + + +def add(x, y): + return x + y + + +inputs = prepare_pythonjob_inputs( + add, function_inputs={"x": 1, "y": 2}, function_outputs=[{"name": "add"}], computer="localhost" +) +run(PythonJob, inputs=inputs) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3852d1c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,149 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +# See https://www.python.org/dev/peps/pep-0621/ +name = "aiida-pythonjob" +dynamic = ["version"] # read from aiida_pythonjob/src/__init__.py +description = "Run Python functions on a remote computer." +authors = [{name = "Xing Wang", email = "xingwang1991@gmail.com"}] +readme = "README.md" +license = {file = "LICENSE"} +classifiers = [ + "Programming Language :: Python", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Development Status :: 3 - Alpha", + "Framework :: AiiDA" +] +keywords = ["aiida", "plugin"] +requires-python = ">=3.9" +dependencies = [ + "aiida-core>=2.3,<3", + "cloudpickle", + "voluptuous" +] + +[project.optional-dependencies] +pre-commit = [ + 'pre-commit~=3.5', +] +docs = [ + "sphinx_rtd_theme", + "sphinx", + "sphinxcontrib-contentui", + "sphinxcontrib-details-directive", + "sphinx-gallery", + "furo", + "markupsafe<2.1", + "nbsphinx" +] + +[project.urls] +Source = "https://github.com/aiidateam/aiida-pythonjob" + +[project.entry-points."aiida.data"] +"pythonjob.pickled_data" = "aiida_pythonjob.data.pickled_data:PickledData" +"pythonjob.pickled_function" = "aiida_pythonjob.data.pickled_function:PickledFunction" + +[project.entry-points."aiida.calculations"] +"pythonjob.pythonjob" = "aiida_pythonjob.calculations.pythonjob:PythonJob" + +[project.entry-points."aiida.parsers"] +"pythonjob.pythonjob" = "aiida_pythonjob.parsers.pythonjob:PythonJobParser" + + +[tool.pytest.ini_options] +# Configuration for [pytest](https://docs.pytest.org) +python_files = "test_*.py example_*.py" +addopts = "--pdbcls=IPython.terminal.debugger:TerminalPdb" +filterwarnings = [ + "ignore::DeprecationWarning:aiida:", + "ignore:Creating AiiDA configuration folder:", + "ignore::DeprecationWarning:plumpy:", + "ignore::DeprecationWarning:yaml:", +] + + +[tool.coverage.run] +# Configuration of [coverage.py](https://coverage.readthedocs.io) +# reporting which lines of your plugin are covered by tests +source = ["src/aiida_pythonjob"] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +ignore = [ + 'F403', # Star imports unable to detect undefined names + 'F405', # Import may be undefined or defined from star imports + 'PLR0911', # Too many return statements + 'PLR0912', # Too many branches + 'PLR0913', # Too many arguments in function definition + 'PLR0915', # Too many statements + 'PLR2004', # Magic value used in comparison + 'RUF005', # Consider iterable unpacking instead of concatenation + 'RUF012' # Mutable class attributes should be annotated with `typing.ClassVar` +] +select = [ + 'E', # pydocstyle + 'W', # pydocstyle + 'F', # pyflakes + 'I', # isort + 'N', # pep8-naming + 'PLC', # pylint-convention + 'PLE', # pylint-error + 'PLR', # pylint-refactor + 'PLW', # pylint-warning + 'RUF' # ruff +] + +## Hatch configurations + +[tool.hatch.version] +path = "src/aiida_pythonjob/__init__.py" + +[tool.hatch.envs.hatch-test] +dependencies = [ + 'pgtest~=1.3,>=1.3.1', + 'coverage~=7.0', + 'pytest~=7.0', + "pytest-cov~=4.1", + "ipdb" +] + +[tool.hatch.envs.hatch-test.scripts] +# These are the efault scripts provided by hatch. +# The have been copied to make the execution more transparent + +# This command is run with the command `hatch test` +run = "pytest{env:HATCH_TEST_ARGS:} {args}" +# The three commands below are run with the command `hatch test --coverage` +run-cov = "coverage run -m pytest{env:HATCH_TEST_ARGS:} {args}" +cov-combine = "coverage combine" +cov-report = "coverage report" + +[[tool.hatch.envs.hatch-test.matrix]] +python = ["3.9", "3.10", "3.11", "3.12"] + +[tool.hatch.envs.hatch-static-analysis] +dependencies = ["ruff==0.4.3"] + +[tool.hatch.envs.hatch-static-analysis.scripts] +# Fixes are executed with `hatch fmt`. +# Checks are executed with `hatch fmt --check`. + +format-check = "ruff format --check --config pyproject.toml {args:.}" +format-fix = "ruff format --config pyproject.toml {args:.}" +lint-check = "ruff check --config pyproject.toml {args:.}" +lint-fix = "ruff check --config pyproject.toml --fix --exit-non-zero-on-fix --show-fixes {args:.}" + +[tool.hatch.envs.docs] +features = ["docs"] + +[tool.hatch.envs.docs.scripts] +build = [ + "make -C docs" +] diff --git a/src/aiida_pythonjob/__init__.py b/src/aiida_pythonjob/__init__.py new file mode 100644 index 0000000..9441320 --- /dev/null +++ b/src/aiida_pythonjob/__init__.py @@ -0,0 +1,16 @@ +"""AiiDA plugin that run Python function on remote computers.""" + +__version__ = "0.1.0" + +from .calculations import PythonJob +from .data import PickledData, PickledFunction +from .launch import prepare_pythonjob_inputs +from .parsers import PythonJobParser + +__all__ = ( + "PythonJob", + "PickledData", + "PickledFunction", + "prepare_pythonjob_inputs", + "PythonJobParser", +) diff --git a/src/aiida_pythonjob/calculations/__init__.py b/src/aiida_pythonjob/calculations/__init__.py new file mode 100644 index 0000000..27f1862 --- /dev/null +++ b/src/aiida_pythonjob/calculations/__init__.py @@ -0,0 +1,3 @@ +from .pythonjob import PythonJob + +__all__ = ("PythonJob",) diff --git a/src/aiida_pythonjob/calculations/pythonjob.py b/src/aiida_pythonjob/calculations/pythonjob.py new file mode 100644 index 0000000..d717c93 --- /dev/null +++ b/src/aiida_pythonjob/calculations/pythonjob.py @@ -0,0 +1,306 @@ +"""Calcjob to run a Python function on a remote computer.""" + +from __future__ import annotations + +import pathlib +import typing as t + +from aiida.common.datastructures import CalcInfo, CodeInfo +from aiida.common.extendeddicts import AttributeDict +from aiida.common.folders import Folder +from aiida.engine import CalcJob, CalcJobProcessSpec +from aiida.orm import ( + Data, + FolderData, + List, + RemoteData, + SinglefileData, + Str, + to_aiida_type, +) + +from aiida_pythonjob.data.pickled_function import PickledFunction, to_pickled_function + +__all__ = ("PythonJob",) + + +class PythonJob(CalcJob): + """Calcjob to run a Python function on a remote computer.""" + + _internal_retrieve_list = [] + _retrieve_singlefile_list = [] + _retrieve_temporary_list = [] + + _DEFAULT_INPUT_FILE = "script.py" + _DEFAULT_OUTPUT_FILE = "aiida.out" + _DEFAULT_PARENT_FOLDER_NAME = "./parent_folder/" + + @classmethod + def define(cls, spec: CalcJobProcessSpec) -> None: # type: ignore[override] + """Define the process specification, including its inputs, outputs and known exit codes. + + :param spec: the calculation job process spec to define. + """ + super().define(spec) + spec.input( + "function", + valid_type=PickledFunction, + serializer=to_pickled_function, + required=False, + ) + spec.input( + "function_source_code", + valid_type=Str, + serializer=to_aiida_type, + required=False, + ) + spec.input("function_name", valid_type=Str, serializer=to_aiida_type, required=False) + spec.input("process_label", valid_type=Str, serializer=to_aiida_type, required=False) + spec.input_namespace( + "function_inputs", valid_type=Data, required=False + ) # , serializer=serialize_to_aiida_nodes) + spec.input( + "function_outputs", + valid_type=List, + default=lambda: List(), + required=False, + serializer=to_aiida_type, + help="The information of the output ports", + ) + spec.input( + "parent_folder", + valid_type=(RemoteData, FolderData, SinglefileData), + required=False, + help="Use a local or remote folder as parent folder (for restarts and similar)", + ) + spec.input( + "parent_folder_name", + valid_type=Str, + required=False, + serializer=to_aiida_type, + help="""Default name of the subfolder that you want to create in the working directory, + in which you want to place the files taken from parent_folder""", + ) + spec.input( + "parent_output_folder", + valid_type=Str, + default=None, + required=False, + serializer=to_aiida_type, + help="Name of the subfolder inside 'parent_folder' from which you want to copy the files", + ) + spec.input_namespace( + "upload_files", + valid_type=(FolderData, SinglefileData), + required=False, + help="The folder/files to upload", + ) + spec.input_namespace( + "copy_files", + valid_type=(RemoteData,), + required=False, + help="The folder/files to copy from the remote computer", + ) + spec.input( + "additional_retrieve_list", + valid_type=List, + default=None, + required=False, + serializer=to_aiida_type, + help="The names of the files to retrieve", + ) + spec.outputs.dynamic = True + # set default options (optional) + spec.inputs["metadata"]["options"]["parser_name"].default = "pythonjob.pythonjob" + spec.inputs["metadata"]["options"]["input_filename"].default = "script.py" + spec.inputs["metadata"]["options"]["output_filename"].default = "aiida.out" + spec.inputs["metadata"]["options"]["resources"].default = { + "num_machines": 1, + "num_mpiprocs_per_machine": 1, + } + # start exit codes - marker for docs + spec.exit_code( + 310, + "ERROR_READING_OUTPUT_FILE", + invalidates_cache=True, + message="The output file could not be read.", + ) + spec.exit_code( + 320, + "ERROR_INVALID_OUTPUT", + invalidates_cache=True, + message="The output file contains invalid output.", + ) + spec.exit_code( + 321, + "ERROR_RESULT_OUTPUT_MISMATCH", + invalidates_cache=True, + message="The number of results does not match the number of outputs.", + ) + + def _build_process_label(self) -> str: + """Use the function name as the process label. + + :returns: The process label to use for ``ProcessNode`` instances. + """ + if "process_label" in self.inputs: + return self.inputs.process_label.value + else: + data = self.get_function_data() + return f"PythonJob<{data['name']}>" + + def on_create(self) -> None: + """Called when a Process is created.""" + + super().on_create() + self.node.label = self._build_process_label() + + def get_function_data(self) -> dict[str, t.Any]: + """Get the function data. + + :returns: The function data. + """ + if "function" in self.inputs: + metadata = self.inputs.function.metadata + metadata["source_code"] = metadata["import_statements"] + "\n" + metadata["source_code_without_decorator"] + return metadata + else: + return { + "source_code": self.inputs.function_source_code.value, + "name": self.inputs.function_name.value, + } + + def prepare_for_submission(self, folder: Folder) -> CalcInfo: + """Prepare the calculation for submission. + + 1) Write the python script to the folder. + 2) Write the inputs to a pickle file and save it to the folder. + + :param folder: A temporary folder on the local file system. + :returns: A :class:`aiida.common.datastructures.CalcInfo` instance. + """ + import cloudpickle as pickle + + dirpath = pathlib.Path(folder._abspath) + inputs: dict[str, t.Any] + + if self.inputs.function_inputs: + inputs = dict(self.inputs.function_inputs) + else: + inputs = {} + if "parent_folder_name" in self.inputs: + parent_folder_name = self.inputs.parent_folder_name.value + else: + parent_folder_name = self._DEFAULT_PARENT_FOLDER_NAME + function_data = self.get_function_data() + # create python script to run the function + script = f""" +import pickle + +# define the function +{function_data["source_code"]} + +# load the inputs from the pickle file +with open('inputs.pickle', 'rb') as handle: + inputs = pickle.load(handle) + +# run the function +result = {function_data["name"]}(**inputs) +# save the result as a pickle file +with open('results.pickle', 'wb') as handle: + pickle.dump(result, handle) +""" + # write the script to the folder + with folder.open(self.options.input_filename, "w", encoding="utf8") as handle: + handle.write(script) + # symlink = settings.pop('PARENT_FOLDER_SYMLINK', False) + symlink = True + + remote_copy_list = [] + local_copy_list = [] + remote_symlink_list = [] + remote_list = remote_symlink_list if symlink else remote_copy_list + + source = self.inputs.get("parent_folder", None) + + if source is not None: + if isinstance(source, RemoteData): + dirpath = pathlib.Path(source.get_remote_path()) + if self.inputs.parent_output_folder is not None: + dirpath = pathlib.Path(source.get_remote_path()) / self.inputs.parent_output_folder.value + remote_list.append( + ( + source.computer.uuid, + str(dirpath), + parent_folder_name, + ) + ) + elif isinstance(source, FolderData): + dirname = self.inputs.parent_output_folder.value if self.inputs.parent_output_folder is not None else "" + local_copy_list.append((source.uuid, dirname, parent_folder_name)) + elif isinstance(source, SinglefileData): + local_copy_list.append((source.uuid, source.filename, source.filename)) + if "upload_files" in self.inputs: + upload_files = self.inputs.upload_files + for key, source in upload_files.items(): + # replace "_dot_" with "." in the key + new_key = key.replace("_dot_", ".") + if isinstance(source, FolderData): + local_copy_list.append((source.uuid, "", new_key)) + elif isinstance(source, SinglefileData): + local_copy_list.append((source.uuid, source.filename, source.filename)) + else: + raise ValueError( + f"""Input folder/file: {source} is not supported. +Only AiiDA SinglefileData and FolderData are allowed.""" + ) + if "copy_files" in self.inputs: + copy_files = self.inputs.copy_files + for key, source in copy_files.items(): + # replace "_dot_" with "." in the key + new_key = key.replace("_dot_", ".") + dirpath = pathlib.Path(source.get_remote_path()) + remote_list.append((source.computer.uuid, str(dirpath), new_key)) + # create pickle file for the inputs + input_values = {} + for key, value in inputs.items(): + if isinstance(value, Data) and hasattr(value, "value"): + # get the value of the pickled data + input_values[key] = value.value + # TODO: should check this recursively + elif isinstance(value, (AttributeDict, dict)): + # if the value is an AttributeDict, use recursively + input_values[key] = {k: v.value for k, v in value.items()} + else: + raise ValueError( + f"Input data {value} is not supported. Only AiiDA data Node with a value attribute is allowed. " + ) + # save the value as a pickle file, the path is absolute + filename = "inputs.pickle" + dirpath = pathlib.Path(folder._abspath) + with folder.open(filename, "wb") as handle: + pickle.dump(input_values, handle) + # create a singlefiledata object for the pickled data + file_data = SinglefileData(file=f"{dirpath}/{filename}") + file_data.store() + local_copy_list.append((file_data.uuid, file_data.filename, filename)) + + codeinfo = CodeInfo() + codeinfo.stdin_name = self.options.input_filename + codeinfo.stdout_name = self.options.output_filename + codeinfo.code_uuid = self.inputs.code.uuid + + calcinfo = CalcInfo() + calcinfo.codes_info = [codeinfo] + calcinfo.local_copy_list = local_copy_list + calcinfo.remote_copy_list = remote_copy_list + calcinfo.remote_symlink_list = remote_symlink_list + calcinfo.retrieve_list = ["results.pickle", self.options.output_filename] + if self.inputs.additional_retrieve_list is not None: + calcinfo.retrieve_list += self.inputs.additional_retrieve_list.get_list() + calcinfo.retrieve_list += self._internal_retrieve_list + + calcinfo.retrieve_temporary_list = self._retrieve_temporary_list + calcinfo.retrieve_singlefile_list = self._retrieve_singlefile_list + + return calcinfo diff --git a/src/aiida_pythonjob/config.py b/src/aiida_pythonjob/config.py new file mode 100644 index 0000000..37e9ee6 --- /dev/null +++ b/src/aiida_pythonjob/config.py @@ -0,0 +1,14 @@ +import json + +from aiida.manage.configuration.settings import AIIDA_CONFIG_FOLDER + + +def load_config() -> dict: + """Load the configuration from the config file.""" + config_file_path = AIIDA_CONFIG_FOLDER / "pythonjob.json" + try: + with config_file_path.open("r") as f: + config = json.load(f) + except FileNotFoundError: + config = {} + return config diff --git a/src/aiida_pythonjob/data/__init__.py b/src/aiida_pythonjob/data/__init__.py new file mode 100644 index 0000000..2a00bfc --- /dev/null +++ b/src/aiida_pythonjob/data/__init__.py @@ -0,0 +1,4 @@ +from .pickled_data import PickledData +from .pickled_function import PickledFunction + +__all__ = ("PickledData", "PickledFunction") diff --git a/src/aiida_pythonjob/data/pickled_data.py b/src/aiida_pythonjob/data/pickled_data.py new file mode 100644 index 0000000..a0afdaa --- /dev/null +++ b/src/aiida_pythonjob/data/pickled_data.py @@ -0,0 +1,87 @@ +"""`Data` sub class to represent any data using pickle.""" + +import sys +from pickle import UnpicklingError + +import cloudpickle +from aiida import orm + + +class Dict(orm.Dict): + @property + def value(self): + return self.get_dict() + + +class List(orm.List): + @property + def value(self): + return self.get_list() + + +class PickledData(orm.Data): + """Data to represent a pickled value using cloudpickle.""" + + FILENAME = "value.pkl" # Class attribute to store the filename + + def __init__(self, value=None, **kwargs): + """Initialize a `PickledData` node instance. + + :param value: raw Python value to initialize the `PickledData` node from. + """ + super().__init__(**kwargs) + self.set_value(value) + + def __str__(self): + return f"{super().__str__()} : {self.get_value()}" + + @property + def value(self): + """Return the contents of this node. + + :return: The unpickled value. + """ + return self.get_value() + + @value.setter + def value(self, value): + self.set_value(value) + + def get_value(self): + """Return the contents of this node, unpickling the stored value. + + :return: The unpickled value. + """ + return self._get_value_from_file() + + def _get_value_from_file(self): + """Read the pickled value from file and return it.""" + try: + with self.base.repository.open(self.FILENAME, mode="rb") as f: + return cloudpickle.loads(f.read()) # Deserialize the value + except (UnpicklingError, ValueError) as e: + raise ImportError( + "Failed to load the pickled value. This may be due to an incompatible pickle protocol. " + "Please ensure that the correct environment and cloudpickle version are being used." + ) from e + except ModuleNotFoundError as e: + raise ImportError( + "Failed to load the pickled value. This may be due to a missing module. " + "Please ensure that the correct environment and cloudpickle version are being used." + ) from e + + def set_value(self, value): + """Set the contents of this node by pickling the provided value. + + :param value: The Python value to pickle and store. + """ + # Serialize the value and store it + serialized_value = cloudpickle.dumps(value) + self.base.repository.put_object_from_bytes(serialized_value, self.FILENAME) + + # Store relevant metadata + python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + self.base.attributes.set("python_version", python_version) + self.base.attributes.set("serializer_module", cloudpickle.__name__) + self.base.attributes.set("serializer_version", cloudpickle.__version__) + self.base.attributes.set("pickle_protocol", cloudpickle.DEFAULT_PROTOCOL) diff --git a/src/aiida_pythonjob/data/pickled_function.py b/src/aiida_pythonjob/data/pickled_function.py new file mode 100644 index 0000000..4cf7d89 --- /dev/null +++ b/src/aiida_pythonjob/data/pickled_function.py @@ -0,0 +1,145 @@ +import inspect +import textwrap +from typing import Any, Callable, Dict, _SpecialForm, get_type_hints + +from .pickled_data import PickledData + + +class PickledFunction(PickledData): + """Data class to represent a pickled Python function.""" + + def __init__(self, value=None, **kwargs): + """Initialize a PickledFunction node instance. + + :param value: a Python function + """ + super().__init__(**kwargs) + if not callable(value): + raise ValueError("value must be a callable Python function") + self.set_value(value) + self.set_attribute(value) + + def __str__(self): + return f"PickledFunction<{self.base.attributes.get('function_name')}> pk={self.pk}" + + @property + def metadata(self): + """Return a dictionary of metadata.""" + return { + "name": self.base.attributes.get("name"), + "import_statements": self.base.attributes.get("import_statements"), + "source_code": self.base.attributes.get("source_code"), + "source_code_without_decorator": self.base.attributes.get("source_code_without_decorator"), + "type": "function", + "is_pickle": True, + } + + @classmethod + def build_callable(cls, func): + """Return the executor for this node.""" + import cloudpickle as pickle + + executor = { + "executor": pickle.dumps(func), + "type": "function", + "is_pickle": True, + } + executor.update(cls.inspect_function(func)) + return executor + + def set_attribute(self, value): + """Set the contents of this node by pickling the provided function. + + :param value: The Python function to pickle and store. + """ + # Serialize the function and extract metadata + serialized_data = self.inspect_function(value) + + # Store relevant metadata + self.base.attributes.set("name", serialized_data["name"]) + self.base.attributes.set("import_statements", serialized_data["import_statements"]) + self.base.attributes.set("source_code", serialized_data["source_code"]) + self.base.attributes.set( + "source_code_without_decorator", + serialized_data["source_code_without_decorator"], + ) + + @classmethod + def inspect_function(cls, func: Callable) -> Dict[str, Any]: + """Serialize a function for storage or transmission.""" + try: + # we need save the source code explicitly, because in the case of jupyter notebook, + # the source code is not saved in the pickle file + source_code = inspect.getsource(func) + # Split the source into lines for processing + source_code_lines = source_code.split("\n") + function_source_code = "\n".join(source_code_lines) + # Find the first line of the actual function definition + for i, line in enumerate(source_code_lines): + if line.strip().startswith("def "): + break + function_source_code_without_decorator = "\n".join(source_code_lines[i:]) + function_source_code_without_decorator = textwrap.dedent(function_source_code_without_decorator) + # we also need to include the necessary imports for the types used in the type hints. + try: + required_imports = cls.get_required_imports(func) + except Exception as e: + required_imports = {} + print(f"Failed to get required imports for function {func.__name__}: {e}") + # Generate import statements + import_statements = "\n".join( + f"from {module} import {', '.join(types)}" for module, types in required_imports.items() + ) + except Exception as e: + print(f"Failed to inspect function {func.__name__}: {e}") + function_source_code = "" + function_source_code_without_decorator = "" + import_statements = "" + return { + "name": func.__name__, + "source_code": function_source_code, + "source_code_without_decorator": function_source_code_without_decorator, + "import_statements": import_statements, + } + + @classmethod + def get_required_imports(cls, func: Callable) -> Dict[str, set]: + """Retrieve type hints and the corresponding modules.""" + type_hints = get_type_hints(func) + imports = {} + + def add_imports(type_hint): + if isinstance(type_hint, _SpecialForm): # Handle special forms like Any, Union, Optional + module_name = "typing" + type_name = type_hint._name or str(type_hint) + elif hasattr(type_hint, "__origin__"): # This checks for higher-order types like List, Dict + module_name = type_hint.__module__ + type_name = getattr(type_hint, "_name", None) or getattr(type_hint.__origin__, "__name__", None) + for arg in getattr(type_hint, "__args__", []): + if arg is type(None): + continue + add_imports(arg) # Recursively add imports for each argument + elif hasattr(type_hint, "__module__"): + module_name = type_hint.__module__ + type_name = type_hint.__name__ + else: + return # If no module or origin, we can't import it, e.g., for literals + + if type_name is not None: + if module_name not in imports: + imports[module_name] = set() + imports[module_name].add(type_name) + + for _, type_hint in type_hints.items(): + add_imports(type_hint) + + return imports + + +def to_pickled_function(value): + """Convert a Python function to a `PickledFunction` instance.""" + return PickledFunction(value) + + +class PickledLocalFunction(PickledFunction): + """PickledFunction subclass for local functions.""" diff --git a/src/aiida_pythonjob/data/serializer.py b/src/aiida_pythonjob/data/serializer.py new file mode 100644 index 0000000..8064d05 --- /dev/null +++ b/src/aiida_pythonjob/data/serializer.py @@ -0,0 +1,122 @@ +import sys +from importlib.metadata import entry_points +from typing import Any + +from aiida import common, orm + +from aiida_pythonjob.config import load_config + +from .pickled_data import PickledData + + +def get_serializer_from_entry_points() -> dict: + """Retrieve the serializer from the entry points.""" + # import time + + # ts = time.time() + configs = load_config() + serializers = configs.get("serializers", {}) + excludes = serializers.get("excludes", []) + # Retrieve the entry points for 'aiida.data' and store them in a dictionary + eps = entry_points() + if sys.version_info >= (3, 10): + group = eps.select(group="aiida.data") + else: + group = eps.get("aiida.data", []) + eps = {} + for ep in group: + # split the entry point name by first ".", and check the last part + key = ep.name.split(".", 1)[-1] + # skip key without "." because it is not a module name for a data type + if "." not in key or key in excludes: + continue + eps.setdefault(key, []) + eps[key].append(ep) + + # print("Time to load entry points: ", time.time() - ts) + # check if there are duplicates + for key, value in eps.items(): + if len(value) > 1: + if key in serializers: + [ep for ep in value if ep.name == serializers[key]] + eps[key] = [ep for ep in value if ep.name == serializers[key]] + if not eps[key]: + raise ValueError(f"Entry point {serializers[key]} not found for {key}") + else: + msg = f"Duplicate entry points for {key}: {[ep.name for ep in value]}" + raise ValueError(msg) + return eps + + +eps = get_serializer_from_entry_points() + + +def serialize_to_aiida_nodes(inputs: dict | None = None) -> dict: + """Serialize the inputs to a dictionary of AiiDA data nodes. + + Args: + inputs (dict): The inputs to be serialized. + + Returns: + dict: The serialized inputs. + """ + new_inputs = {} + # save all kwargs to inputs port + for key, data in inputs.items(): + new_inputs[key] = general_serializer(data) + return new_inputs + + +def clean_dict_key(data): + """Replace "." with "__dot__" in the keys of a dictionary.""" + if isinstance(data, dict): + return {k.replace(".", "__dot__"): clean_dict_key(v) for k, v in data.items()} + return data + + +def general_serializer(data: Any, check_value=True) -> orm.Node: + """Serialize the data to an AiiDA data node.""" + if isinstance(data, orm.Data): + if check_value and not hasattr(data, "value"): + raise ValueError("Only AiiDA data Node with a value attribute is allowed.") + return data + elif isinstance(data, common.extendeddicts.AttributeDict): + # if the data is an AttributeDict, use it directly + return data + # if is string with syntax {{}}, this is a port will read data from ctx + elif isinstance(data, str) and data.startswith("{{") and data.endswith("}}"): + return data + # if data is a class instance, get its __module__ and class name as a string + # for example, an Atoms will have ase.atoms.Atoms + else: + data = clean_dict_key(data) + # try to get the serializer from the entry points + data_type = type(data) + ep_key = f"{data_type.__module__}.{data_type.__name__}" + # search for the key in the entry points + if ep_key in eps: + try: + new_node = eps[ep_key][0].load()(data) + except Exception as e: + raise ValueError(f"Error in serializing {ep_key}: {e}") + finally: + # try to save the node to da + try: + new_node.store() + return new_node + except Exception: + # try to serialize the value as a PickledData + try: + new_node = PickledData(data) + new_node.store() + return new_node + except Exception as e: + raise ValueError(f"Error in serializing {ep_key}: {e}") + else: + # try to serialize the data as a PickledData + try: + new_node = PickledData(data) + new_node.store() + return new_node + except Exception as e: + raise ValueError(f"Error in serializing {ep_key}: {e}") diff --git a/src/aiida_pythonjob/launch.py b/src/aiida_pythonjob/launch.py new file mode 100644 index 0000000..2c3bd03 --- /dev/null +++ b/src/aiida_pythonjob/launch.py @@ -0,0 +1,67 @@ +from typing import Any, Callable + +from aiida.orm import AbstractCode, Computer, FolderData, List, SinglefileData, Str + +from .data.pickled_function import PickledFunction +from .data.serializer import serialize_to_aiida_nodes +from .utils import get_or_create_code + + +def prepare_pythonjob_inputs( + function: Callable[..., Any], + function_inputs: dict[str, Any] | None = None, + function_outputs: dict[str, Any] | None = None, + code: AbstractCode | None = None, + command_info: dict[str, str] | None = None, + computer: str | Computer = "localhost", + metadata: dict[str, Any] | None = None, + upload_files: dict[str, str] = {}, + **kwargs: Any, +) -> dict[str, Any]: + """Prepare the inputs for PythonJob""" + import os + + # get the names kwargs for the PythonJob, which are the inputs before _wait + executor = PickledFunction.build_callable(function) + new_upload_files = {} + # change the string in the upload files to SingleFileData, or FolderData + for key, source in upload_files.items(): + # only alphanumeric and underscores are allowed in the key + # replace all "." with "_dot_" + new_key = key.replace(".", "_dot_") + if isinstance(source, str): + if os.path.isfile(source): + new_upload_files[new_key] = SinglefileData(file=source) + elif os.path.isdir(source): + new_upload_files[new_key] = FolderData(tree=source) + elif isinstance(source, (SinglefileData, FolderData)): + new_upload_files[new_key] = source + else: + raise ValueError(f"Invalid upload file type: {type(source)}, {source}") + # + if code is None: + command_info = command_info or {} + code = get_or_create_code(computer=computer, **command_info) + # get the source code of the function + function_name = executor["name"] + if executor.get("is_pickle", False): + function_source_code = executor["import_statements"] + "\n" + executor["source_code_without_decorator"] + else: + function_source_code = f"from {executor['module']} import {function_name}" + + # serialize the kwargs into AiiDA Data + function_inputs = function_inputs or {} + function_inputs = serialize_to_aiida_nodes(function_inputs) + # transfer the args to kwargs + inputs = { + "process_label": "PythonJob<{}>".format(function_name), + "function_source_code": Str(function_source_code), + "function_name": Str(function_name), + "code": code, + "function_inputs": function_inputs, + "upload_files": new_upload_files, + "function_outputs": List(function_outputs), + "metadata": metadata or {}, + **kwargs, + } + return inputs diff --git a/src/aiida_pythonjob/parsers/__init__.py b/src/aiida_pythonjob/parsers/__init__.py new file mode 100644 index 0000000..30b7784 --- /dev/null +++ b/src/aiida_pythonjob/parsers/__init__.py @@ -0,0 +1,3 @@ +from .pythonjob import PythonJobParser + +__all__ = ("PythonJobParser",) diff --git a/src/aiida_pythonjob/parsers/pythonjob.py b/src/aiida_pythonjob/parsers/pythonjob.py new file mode 100644 index 0000000..4b9f890 --- /dev/null +++ b/src/aiida_pythonjob/parsers/pythonjob.py @@ -0,0 +1,111 @@ +"""Parser for an `PythonJob` job.""" + +from aiida.engine import ExitCode +from aiida.parsers.parser import Parser + +from aiida_pythonjob.data.serializer import general_serializer + + +class PythonJobParser(Parser): + """Parser for an `PythonJob` job.""" + + def parse(self, **kwargs): + """Parse the contents of the output files stored in the `retrieved` output node. + + The function_outputs could be a namespce, e.g., + function_outputs=[ + {"identifier": "namespace", "name": "add_multiply"}, + {"name": "add_multiply.add"}, + {"name": "add_multiply.multiply"}, + {"name": "minus"}, + ] + """ + import pickle + + function_outputs = self.node.inputs.function_outputs.get_list() + if len(function_outputs) == 0: + function_outputs = [{"name": "result"}] + self.output_list = function_outputs + # first we remove nested outputs, e.g., "add_multiply.add" + top_level_output_list = [output for output in self.output_list if "." not in output["name"]] + exit_code = 0 + try: + with self.retrieved.base.repository.open("results.pickle", "rb") as handle: + results = pickle.load(handle) + if isinstance(results, tuple): + if len(top_level_output_list) != len(results): + self.exit_codes.ERROR_RESULT_OUTPUT_MISMATCH + for i in range(len(top_level_output_list)): + top_level_output_list[i]["value"] = self.serialize_output(results[i], top_level_output_list[i]) + elif isinstance(results, dict) and len(top_level_output_list) > 1: + # pop the exit code if it exists + exit_code = results.pop("exit_code", 0) + for output in top_level_output_list: + if output.get("required", False): + if output["name"] not in results: + self.exit_codes.ERROR_MISSING_OUTPUT + output["value"] = self.serialize_output(results.pop(output["name"]), output) + # if there are any remaining results, raise an warning + if results: + self.logger.warning( + f"Found extra results that are not included in the output: {results.keys()}" + ) + elif isinstance(results, dict) and len(top_level_output_list) == 1: + exit_code = results.pop("exit_code", 0) + # if output name in results, use it + if top_level_output_list[0]["name"] in results: + top_level_output_list[0]["value"] = self.serialize_output( + results[top_level_output_list[0]["name"]], + top_level_output_list[0], + ) + # otherwise, we assume the results is the output + else: + top_level_output_list[0]["value"] = self.serialize_output(results, top_level_output_list[0]) + elif len(top_level_output_list) == 1: + # otherwise, we assume the results is the output + top_level_output_list[0]["value"] = self.serialize_output(results, top_level_output_list[0]) + else: + raise ValueError("The number of results does not match the number of outputs.") + for output in top_level_output_list: + self.out(output["name"], output["value"]) + if exit_code: + if isinstance(exit_code, dict): + exit_code = ExitCode(exit_code["status"], exit_code["message"]) + elif isinstance(exit_code, int): + exit_code = ExitCode(exit_code) + return exit_code + except OSError: + return self.exit_codes.ERROR_READING_OUTPUT_FILE + except ValueError as exception: + self.logger.error(exception) + return self.exit_codes.ERROR_INVALID_OUTPUT + + def find_output(self, name): + """Find the output with the given name.""" + for output in self.output_list: + if output["name"] == name: + return output + return None + + def serialize_output(self, result, output): + """Serialize outputs.""" + + name = output["name"] + if output.get("identifier", "Any").upper() in ["NAMESPACE", "WORKGRAPH.NAMESPACE"]: + if isinstance(result, dict): + serialized_result = {} + for key, value in result.items(): + full_name = f"{name}.{key}" + full_name_output = self.find_output(full_name) + if full_name_output and full_name_output.get("identifier", "Any").upper() in [ + "NAMESPACE", + "WORKGRAPH.NAMESPACE", + ]: + serialized_result[key] = self.serialize_output(value, full_name_output) + else: + serialized_result[key] = general_serializer(value) + return serialized_result + else: + self.exit_codes.ERROR_INVALID_OUTPUT + else: + return general_serializer(result) diff --git a/src/aiida_pythonjob/utils.py b/src/aiida_pythonjob/utils.py new file mode 100644 index 0000000..5c170b9 --- /dev/null +++ b/src/aiida_pythonjob/utils.py @@ -0,0 +1,31 @@ +from typing import Optional + +from aiida.common.exceptions import NotExistent +from aiida.orm import Computer, InstalledCode, load_code, load_computer + + +def get_or_create_code( + label: str = "python3", + computer: Optional[str | Computer] = "localhost", + filepath_executable: Optional[str] = None, + prepend_text: str = "", +): + """Try to load code, create if not exit.""" + + try: + return load_code(f"{label}@{computer}") + except NotExistent: + description = f"Code on computer: {computer}" + computer = load_computer(computer) + filepath_executable = filepath_executable or label + code = InstalledCode( + computer=computer, + label=label, + description=description, + filepath_executable=filepath_executable, + default_calc_job_plugin="pythonjob.pythonjob", + prepend_text=prepend_text, + ) + + code.store() + return code diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..327591a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,18 @@ +import pytest + +pytest_plugins = "aiida.tools.pytest_fixtures" + + +@pytest.fixture(scope="session", autouse=True) +def aiida_profile(aiida_config, aiida_profile_factory): + """Create and load a profile with RabbitMQ as broker.""" + with aiida_profile_factory(aiida_config, broker_backend="core.rabbitmq") as profile: + yield profile + + +@pytest.fixture +def fixture_localhost(aiida_localhost): + """Return a localhost `Computer`.""" + localhost = aiida_localhost + localhost.set_default_mpiprocs_per_machine(1) + return localhost diff --git a/tests/input.txt b/tests/input.txt new file mode 100644 index 0000000..d8263ee --- /dev/null +++ b/tests/input.txt @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/tests/inputs_folder/another_input.txt b/tests/inputs_folder/another_input.txt new file mode 100644 index 0000000..e440e5c --- /dev/null +++ b/tests/inputs_folder/another_input.txt @@ -0,0 +1 @@ +3 \ No newline at end of file diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 0000000..185d404 --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,23 @@ +from aiida_pythonjob import PickledFunction + + +def test_typing(): + """Test function with typing.""" + from typing import List + + from numpy import array + + def generate_structures( + strain_lst: List[float], + data: array, + data1: array, + strain_lst1: list, + ) -> list[array]: + pass + + modules = PickledFunction.get_required_imports(generate_structures) + assert modules == { + "typing": {"List"}, + "builtins": {"list", "float"}, + "numpy": {"array"}, + } diff --git a/tests/test_parsers.py b/tests/test_parsers.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_pythonjob.py b/tests/test_pythonjob.py new file mode 100644 index 0000000..c1f4cb7 --- /dev/null +++ b/tests/test_pythonjob.py @@ -0,0 +1,225 @@ +import pytest +from aiida.engine import run_get_node +from aiida_pythonjob import PythonJob, prepare_pythonjob_inputs + + +def test_function_default_outputs(fixture_localhost): + """Test decorator.""" + + def add(x, y): + return x + y + + inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + ) + result, node = run_get_node(PythonJob, **inputs) + print("result: ", result) + + assert result["result"].value == 3 + assert node.process_label == "PythonJob" + + +def test_function_custom_outputs(fixture_localhost): + """Test decorator.""" + + def add(x, y): + return {"sum": x + y, "diff": x - y} + + inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + function_outputs=[ + {"name": "sum"}, + {"name": "diff"}, + ], + ) + result, node = run_get_node(PythonJob, **inputs) + + assert result["sum"].value == 3 + assert result["diff"].value == -1 + + +@pytest.mark.skip("Can not inspect the built-in function.") +def test_importable_function(fixture_localhost): + """Test importable function.""" + from operator import add + + inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + function_outputs=[ + {"name": "sum"}, + ], + ) + result, node = run_get_node(PythonJob, **inputs) + print("result: ", result) + assert result["sum"].value == 3 + + +def test_kwargs_inputs(fixture_localhost): + """Test function with kwargs.""" + + def add(x, y=1, **kwargs): + x += y + for value in kwargs.values(): + x += value + return x + + inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2, "a": 3, "b": 4}, + function_outputs=[ + {"name": "sum"}, + ], + ) + result, node = run_get_node(PythonJob, **inputs) + assert result["sum"].value == 10 + + +def test_namespace_output(fixture_localhost): + """Test function with namespace output and input.""" + + def myfunc(x, y): + add = {"order1": x + y, "order2": x * x + y * y} + return { + "add_multiply": {"add": add, "multiply": x * y}, + "minus": x - y, + } + + inputs = prepare_pythonjob_inputs( + myfunc, + function_inputs={"x": 1, "y": 2}, + function_outputs=[ + { + "name": "add_multiply", + "identifier": "namespace", + }, + { + "name": "add_multiply.add", + "identifier": "namespace", + }, + {"name": "minus"}, + ], + ) + result, node = run_get_node(PythonJob, **inputs) + print("result: ", result) + + assert result["add_multiply"]["add"]["order1"].value == 3 + assert result["add_multiply"]["add"]["order2"].value == 5 + assert result["add_multiply"]["multiply"].value == 2 + + +def test_parent_folder(fixture_localhost): + """Test function with parent folder.""" + + def add(x, y): + z = x + y + with open("result.txt", "w") as f: + f.write(str(z)) + return x + y + + def multiply(x, y): + with open("parent_folder/result.txt", "r") as f: + z = int(f.read()) + return x * y + z + + inputs1 = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + function_outputs=[{"name": "sum"}], + ) + result1, node1 = run_get_node(PythonJob, inputs=inputs1) + + inputs2 = prepare_pythonjob_inputs( + multiply, + function_inputs={"x": 1, "y": 2}, + function_outputs=[{"name": "product"}], + parent_folder=result1["remote_folder"], + ) + result2, node2 = run_get_node(PythonJob, inputs=inputs2) + + assert result2["product"].value == 5 + + +def test_upload_files(fixture_localhost): + """Test function with upload files.""" + + # create a temporary file "input.txt" in the current directory + with open("input.txt", "w") as f: + f.write("2") + + # create a temporary folder "inputs_folder" in the current directory + # and add a file "another_input.txt" in the folder + import os + + os.makedirs("inputs_folder", exist_ok=True) + with open("inputs_folder/another_input.txt", "w") as f: + f.write("3") + + def add(): + with open("input.txt", "r") as f: + a = int(f.read()) + with open("inputs_folder/another_input.txt", "r") as f: + b = int(f.read()) + return a + b + + # ------------------------- Submit the calculation ------------------- + # we need use full path to the file + input_file = os.path.abspath("input.txt") + input_folder = os.path.abspath("inputs_folder") + inputs = prepare_pythonjob_inputs( + add, + upload_files={ + "input.txt": input_file, + "inputs_folder": input_folder, + }, + ) + result, node = run_get_node(PythonJob, inputs=inputs) + + # wait=True) + assert result["result"].value == 5 + + +def test_retrieve_files(fixture_localhost): + """Test retrieve files.""" + + def add(x, y): + z = x + y + with open("result.txt", "w") as f: + f.write(str(z)) + return x + y + + inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": 1, "y": 2}, + metadata={ + "options": { + "additional_retrieve_list": ["result.txt"], + } + }, + ) + result, node = run_get_node(PythonJob, inputs=inputs) + # ------------------------- Submit the calculation ------------------- + + assert "result.txt" in result["retrieved"].list_object_names() + + +def test_exit_code(fixture_localhost): + """Test function with exit code.""" + from numpy import array + + def add(x: array, y: array) -> array: + sum = x + y + if (sum < 0).any(): + exit_code = {"status": 410, "message": "Some elements are negative"} + return {"sum": sum, "exit_code": exit_code} + return {"sum": sum} + + inputs = prepare_pythonjob_inputs( + add, + function_inputs={"x": array([1, 1]), "y": array([1, -2])}, + ) + result, node = run_get_node(PythonJob, inputs=inputs) + assert node.exit_status == 410 + assert node.exit_message == "Some elements are negative"