From 5d53c9102d5791f7027939c06d1fc6f636283e12 Mon Sep 17 00:00:00 2001 From: Co Quach <43968221+daico007@users.noreply.github.com> Date: Thu, 16 May 2024 11:05:22 -0500 Subject: [PATCH] Consolidating HOOMD project and clean up (#261) * Merge hoomd4 subproject into main project Scrap original hoomd project directory in favor of using hoomd4 * remove irrelevant unit tests * Add comments block explaining HOOMD equil checks Also clean up commented out code. * remove lammps-UD --- .../aggregate_summary/aggregate_init.py | 3 +- .../bin/clean_by_engine.sh | 2 +- .../hoomd4_subproject/src/README.md | 7 - .../hoomd4_subproject/src/__init__.py | 1 - .../src/analysis/__init__.py | 5 - .../src/analysis/diffusion.py | 102 --- .../src/analysis/equilibration.py | 212 ------- .../hoomd4_subproject/src/analysis/rdf.py | 93 --- .../hoomd4_subproject/src/analysis/sampler.py | 237 ------- .../hoomd4_subproject/src/dashboard.py | 24 - .../hoomd4_subproject/src/engines/README.md | 1 - .../hoomd4_subproject/src/engines/__init__.py | 1 - reproducibility_project/init.py | 3 +- .../aggregate_summary/aggregate_init.py | 3 +- .../src/engine_input/lammps-UD/.keep | 0 .../hoomd/{project_rahman.py => project.py} | 0 .../src/engines/lammps-UD/.keep | 0 .../subproject-analysis.py | 2 +- .../lrc_shift_subproject/subproject-init.py | 3 +- reproducibility_project/project-analysis.py | 106 +--- .../spe_subproject/bin/clean_by_engine.sh | 2 +- .../src/engine_input/lammps-UD/.keep | 0 .../src/engine_input/lammps-UD/in.minimize | 49 -- .../src/engine_input/lammps-UD/in.spe | 56 -- .../src/engine_input/lammps-UD/submit.slurm | 60 -- .../src/engines/lammps-UD/.keep | 0 .../src/engines/lammps-UD/example-project.py | 92 --- .../src/engines/lammps-UD/main_project.py | 428 ------------- .../src/engines/lammps-UD/project.py | 216 ------- .../spe_subproject/subproject-init.py | 3 +- .../src/analysis/diffusion.py | 102 --- .../src/engine_input/lammps/.keep | 0 .../engine_input/lammps/UD_scripts/submit.pbs | 13 - .../lammps/UD_scripts/submit.slurm | 48 -- .../lammps/input_scripts/in.equilibration | 43 -- .../lammps/input_scripts/in.minimize | 43 -- .../lammps/input_scripts/in.production-npt | 43 -- .../lammps/input_scripts/in.production-nvt | 43 -- .../lammps/submission_scripts/__init__.py | 1 - .../src/engines/hoomd/__init__.py | 1 - .../src/engines/hoomd/project.py | 592 ------------------ .../src/engines/hoomd/project_rahman.py | 552 ---------------- .../src/engines/hoomd4/__init__.py | 0 .../engines/hoomd4}/environment.yml | 2 +- .../src/engines/hoomd4/project.py | 28 +- .../src/engines/lammps-UD/project.py | 216 ------- .../tests/test_diffusion.py | 21 - .../aggregate_summary/aggregate_init.py | 2 +- .../subproject-analysis.py | 2 +- .../subproject-init.py | 2 +- 50 files changed, 23 insertions(+), 3442 deletions(-) delete mode 100644 reproducibility_project/hoomd4_subproject/src/README.md delete mode 100644 reproducibility_project/hoomd4_subproject/src/__init__.py delete mode 100644 reproducibility_project/hoomd4_subproject/src/analysis/__init__.py delete mode 100644 reproducibility_project/hoomd4_subproject/src/analysis/diffusion.py delete mode 100644 reproducibility_project/hoomd4_subproject/src/analysis/equilibration.py delete mode 100644 reproducibility_project/hoomd4_subproject/src/analysis/rdf.py delete mode 100644 reproducibility_project/hoomd4_subproject/src/analysis/sampler.py delete mode 100644 reproducibility_project/hoomd4_subproject/src/dashboard.py delete mode 100644 reproducibility_project/hoomd4_subproject/src/engines/README.md delete mode 100644 reproducibility_project/hoomd4_subproject/src/engines/__init__.py delete mode 100644 reproducibility_project/lrc_shift_subproject/src/engine_input/lammps-UD/.keep rename reproducibility_project/lrc_shift_subproject/src/engines/hoomd/{project_rahman.py => project.py} (100%) delete mode 100644 reproducibility_project/lrc_shift_subproject/src/engines/lammps-UD/.keep delete mode 100644 reproducibility_project/spe_subproject/src/engine_input/lammps-UD/.keep delete mode 100644 reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.minimize delete mode 100644 reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.spe delete mode 100644 reproducibility_project/spe_subproject/src/engine_input/lammps-UD/submit.slurm delete mode 100644 reproducibility_project/spe_subproject/src/engines/lammps-UD/.keep delete mode 100644 reproducibility_project/spe_subproject/src/engines/lammps-UD/example-project.py delete mode 100644 reproducibility_project/spe_subproject/src/engines/lammps-UD/main_project.py delete mode 100644 reproducibility_project/spe_subproject/src/engines/lammps-UD/project.py delete mode 100644 reproducibility_project/src/analysis/diffusion.py delete mode 100644 reproducibility_project/src/engine_input/lammps/.keep delete mode 100644 reproducibility_project/src/engine_input/lammps/UD_scripts/submit.pbs delete mode 100644 reproducibility_project/src/engine_input/lammps/UD_scripts/submit.slurm delete mode 100644 reproducibility_project/src/engine_input/lammps/input_scripts/in.equilibration delete mode 100644 reproducibility_project/src/engine_input/lammps/input_scripts/in.minimize delete mode 100644 reproducibility_project/src/engine_input/lammps/input_scripts/in.production-npt delete mode 100644 reproducibility_project/src/engine_input/lammps/input_scripts/in.production-nvt delete mode 100644 reproducibility_project/src/engine_input/lammps/submission_scripts/__init__.py delete mode 100644 reproducibility_project/src/engines/hoomd/__init__.py delete mode 100644 reproducibility_project/src/engines/hoomd/project.py delete mode 100644 reproducibility_project/src/engines/hoomd/project_rahman.py rename reproducibility_project/{hoomd4_subproject => }/src/engines/hoomd4/__init__.py (100%) rename reproducibility_project/{hoomd4_subproject => src/engines/hoomd4}/environment.yml (97%) rename reproducibility_project/{hoomd4_subproject => }/src/engines/hoomd4/project.py (96%) delete mode 100644 reproducibility_project/src/engines/lammps-UD/project.py delete mode 100644 reproducibility_project/tests/test_diffusion.py diff --git a/reproducibility_project/aggregate_summary/aggregate_init.py b/reproducibility_project/aggregate_summary/aggregate_init.py index d189c421..3f017eec 100644 --- a/reproducibility_project/aggregate_summary/aggregate_init.py +++ b/reproducibility_project/aggregate_summary/aggregate_init.py @@ -31,9 +31,8 @@ def dict_product(dd): "gromacs", "hoomd", "lammps-VU", - # "lammps-UD", ] -md_engines = ["gromacs", "hoomd", "lammps-VU"] # , "lammps-UD"] +md_engines = ["gromacs", "hoomd", "lammps-VU"] mc_engines = ["cassandra", "mcccs", "gomc"] forcefields = {} r_cuts = {} diff --git a/reproducibility_project/bin/clean_by_engine.sh b/reproducibility_project/bin/clean_by_engine.sh index 80b8fb3c..ab7a9810 100644 --- a/reproducibility_project/bin/clean_by_engine.sh +++ b/reproducibility_project/bin/clean_by_engine.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash echo Removing jobs from all engines but $1 -for ENGINE in cassandra mcccs gomc gromacs hoomd lammps-VU lammps-UD +for ENGINE in cassandra mcccs gomc gromacs hoomd lammps-VU do if [[ $ENGINE != $1 ]] then diff --git a/reproducibility_project/hoomd4_subproject/src/README.md b/reproducibility_project/hoomd4_subproject/src/README.md deleted file mode 100644 index 7ba75ec8..00000000 --- a/reproducibility_project/hoomd4_subproject/src/README.md +++ /dev/null @@ -1,7 +0,0 @@ -Location for storing the files that need to be accessed by the various statepoints. - -Contained within are: -* `signac` project file (`project.py`) -* Directory for input and configuration files for each simulation engine (`engine_input`) -* Directory for all starting molecules (`molecules`) -* Directory for all forcefield XML files for system parameters (`xmls`) diff --git a/reproducibility_project/hoomd4_subproject/src/__init__.py b/reproducibility_project/hoomd4_subproject/src/__init__.py deleted file mode 100644 index cfc31227..00000000 --- a/reproducibility_project/hoomd4_subproject/src/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Module containing all code related to reproducibility project.""" diff --git a/reproducibility_project/hoomd4_subproject/src/analysis/__init__.py b/reproducibility_project/hoomd4_subproject/src/analysis/__init__.py deleted file mode 100644 index 5e033209..00000000 --- a/reproducibility_project/hoomd4_subproject/src/analysis/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Analysis routines and helper methods.""" - -from reproducibility_project.src.analysis.equilibration import * -from reproducibility_project.src.analysis.rdf import gsd_rdf -from reproducibility_project.src.analysis.sampler import sample_job diff --git a/reproducibility_project/hoomd4_subproject/src/analysis/diffusion.py b/reproducibility_project/hoomd4_subproject/src/analysis/diffusion.py deleted file mode 100644 index 733aca30..00000000 --- a/reproducibility_project/hoomd4_subproject/src/analysis/diffusion.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Facilitate calculation of the diffusion coefficient from simulation data.""" - -import gsd.hoomd -import matplotlib.pyplot as plt -import numpy as np -import unyt as u -from freud.box import Box -from freud.msd import MSD -from scipy import stats - - -def gsd_msd(job, filename="trajectory.gsd", skip=2, stride=1, unwrapped=False): - """Compute the MSD and diffusion coefficient given a Signac Job object. - - The job folder is expected to contain the file "trajectory.gsd" with lengths - in nanometers. - This function is a convenience wrapper for freud's MSD module - https://freud.readthedocs.io/en/latest/modules/msd.html - The diffusion coefficient is calculated as the slope/6 of a linear fit to - the MSD following https://doi.org/10.1002/jcc.21939 - After execution, the files "msd.png" and "msd.txt" are created in the job - folder and the "diffusion_coefficent" (in nm^2/ps) is set in the job.doc. - - Parameters - ---------- - job : signac.contrib.job.Job - The Job object. - filename : str, default "trajectory.gsd" - The relative path (from the job directory) to the trajectory file to - analyze. - skip : int, default 2 - The number of frames from the trajectory to skip. - stride : int, default 1 - The step size between frames. - unwrapped : bool, default False - Whether the trajectory has unwrapped positions. By default, it is - assumed that the positions are not unwrapped and the box and image - information will be used to unwrap the positions. Otherwise only the - particle positions will be used. - - Returns - ------- - freud.msd.MSD - Computed MSD object - """ - msd, timesteps = _gsd_msd(job.fn(filename), skip, stride, unwrapped) - - m, b, r, p, std_err = stats.linregress(timesteps, msd.msd) - - fig, ax = plt.subplots() - ax.plot( - timesteps, - m * timesteps + b, - label=f"linear fit\ny = {m:.1e}x + {b:.1e}\n(r = {r:.3f})", - ) - ax.plot(timesteps, msd.msd, label="MSD") - ax.set_xlabel("$Time (ps)$") - ax.set_ylabel(r"$MSD (nm^{2})$") - ax.set_title("MSD") - - fig.savefig(job.fn("msd.png")) - - np.savetxt(job.fn("msd.txt"), msd.msd) - - # calculated according to - # https://doi.org/10.1002/jcc.21939 - # units nm^2/ps - job.doc["diffusion_coefficient"] = m / 6 - return msd - - -def _gsd_msd(gsdfile, skip, stride, unwrapped): - """Compute the MSD given a GSD file.""" - if not unwrapped: - with gsd.hoomd.open(gsdfile) as trajectory: - boxes = [] - images = [] - positions = [] - timesteps = [] - for frame in trajectory[skip::stride]: - boxes.append(frame.configuration.box) - images.append(frame.particles.image) - positions.append(frame.particles.position) - timesteps.append(frame.configuration.step) - - if not all(all(i == boxes[0]) for i in boxes): - raise ValueError( - "MSD calculation requires that the box size does not change." - ) - msd = MSD(Box.from_box(boxes[0])) - else: - with gsd.hoomd.open(gsdfile) as trajectory: - images = None - positions = [] - timesteps = [] - for frame in trajectory[skip::stride]: - positions.append(frame.particles.position) - timesteps.append(frame.configuration.step) - - msd = MSD() - msd.compute(positions, images=images) - return msd, np.array(timesteps) diff --git a/reproducibility_project/hoomd4_subproject/src/analysis/equilibration.py b/reproducibility_project/hoomd4_subproject/src/analysis/equilibration.py deleted file mode 100644 index 7fc7459c..00000000 --- a/reproducibility_project/hoomd4_subproject/src/analysis/equilibration.py +++ /dev/null @@ -1,212 +0,0 @@ -"""Timeseries and pyMBAR related methods.""" - -import pathlib -from typing import List - -import numpy as np -import numpy.typing as npt -import pandas as pd -from pymbar import timeseries -from signac.contrib.job import Job - - -def is_equilibrated( - a_t: npt.ArrayLike, - threshold_fraction: float = 0.8, - threshold_neff: int = 100, - strict: bool = False, - nskip: int = 1, -) -> List: - """Check if a dataset is equilibrated based on a fraction of equil data. - - Using `pymbar.timeseries` module, check if a timeseries dataset has enough - equilibrated data based on two threshold values. The threshold_fraction - value translates to the fraction of total data from the dataset 'a_t' that - can be thought of as being in the 'production' region. The threshold_neff - is the minimum amount of effectively uncorrelated samples to have in a_t to - consider it equilibrated. - - The `pymbar.timeseries` module returns the starting index of the - 'production' region from 'a_t'. The fraction of 'production' data is - then compared to the threshold value. If the fraction of 'production' data - is >= threshold fraction this will return a list of - [True, t0, g, Neff] and [False, None, None, None] otherwise. - - Parameters - ---------- - a_t : numpy.typing.Arraylike - 1-D time dependent data to check for equilibration. - threshold_fraction : float, optional, default=0.8 - Fraction of data expected to be equilibrated. - threshold_neff : int, optional, default=100 - Minimum amount of effectively correlated samples to consider a_t - 'equilibrated'. - strict : bool, optional, default=False - If strict require both threshold_fraction and threshold_neff to be - true to evaluate as 'equilibrated', else require either or. - nskip : int, optional, default=1 - Since the statistical inefficiency is computed for every time origin - in a call to timeseries.detectEquilibration, for larger datasets - (> few hundred), increasing nskip might speed this up, while - discarding more data. - """ - if threshold_fraction < 0.0 or threshold_fraction > 1.0: - raise ValueError( - f"Passed 'threshold_fraction' value: {threshold_fraction}, " - "expected value between 0.0-1.0." - ) - - threshold_neff = int(threshold_neff) - if threshold_neff < 1: - raise ValueError( - f"Passed 'threshold_neff' value: {threshold_neff}, expected value " - "1 or greater." - ) - - [t0, g, Neff] = timeseries.detectEquilibration(a_t, nskip=nskip) - frac_equilibrated = 1.0 - (t0 / np.shape(a_t)[0]) - - if strict: - if (frac_equilibrated >= threshold_fraction) and ( - Neff >= threshold_neff - ): - return [True, t0, g, Neff] - else: - return [False, None, None, None] - else: - if (frac_equilibrated >= threshold_fraction) or ( - Neff >= threshold_neff - ): - return [True, t0, g, Neff] - else: - return [False, None, None, None] - - -def trim_non_equilibrated( - a_t: npt.ArrayLike, - threshold_fraction: float = 0.75, - threshold_neff: int = 100, - strict: bool = False, - nskip: int = 1, -) -> List: - """Prune timeseries array to just the production data. - - Refer to equilibration.is_equilibrated for addtional information. - - This method returns a list of length 3, where list[0] is the trimmed array, - list[1] is the index of the original dataset where equilibration begins, - list[2] is the calculated statistical inefficiency, which can be used - when subsampling the data using `pymbar.timseries.subsampleCorrelatedData`, - list[3] is the number of effective uncorrelated data points. - - Refer to https://pymbar.readthedocs.io/en/master/timeseries.html for - additional information. - - Parameters - ---------- - a_t : numpy.typing.Arraylike - 1-D time dependent data to check for equilibration. - threshold_fraction : float, optional, default=0.75 - Fraction of data expected to be equilibrated. - threshold_neff : int, optional, default=100 - Minimum amount of uncorrelated samples. - strict : bool, optional, default=False - If strict require both threshold_fraction and threshold_neff to be - true to evaluate as 'equilibrated', else require either or. - nskip : int, optional, default=1 - Since the statistical inefficiency is computed for every time origin - in a call to timeseries.detectEquilibration, for larger datasets - (> few hundred), increasing nskip might speed this up, while - discarding more data. - """ - [truth, t0, g, Neff] = is_equilibrated( - a_t, - threshold_fraction=threshold_fraction, - threshold_neff=threshold_neff, - strict=strict, - nskip=nskip, - ) - if not truth: - raise ValueError( - f"Data with a threshold_fraction of {threshold_fraction} and " - f"threshold_neff {threshold_neff} is not equilibrated!" - ) - - return [a_t[t0:], t0, g, Neff] - - -def plot_job_property_with_t0( - job: Job, - filename: str, - property_name: str, - log_filename: str = "log.txt", - title: str = None, - vline_scale: float = 1.1, - threshold_fraction: float = 0.0, - threshold_neff: int = 1, - strict: bool = False, - overwrite: bool = False, - data_plt_kwargs: dict = None, - vline_plt_kwargs: dict = None, -) -> None: - """Plot data with a vertical line at beginning of equilibration for a specific job and property. - - Parameters - ---------- - job : signac.contrib.job.Job, required - The signac job to access the necessary data files. - filename : str, required - The name of the output image. - Only the name of the file and extension is expected, the location will - be within the job. - property_name : str, required - The name of the property to plot. - log_filename : str, default "log.txt" - The relative path (from the job directory) to the log file name to read. - title : str, optional, default = Property - Title of the plot - vline_scale : float, optional, default=1.1 - Scale the min and max components of the vertical line. - threshold_fraction : float, optional, default=0.0 - Fraction of data expected to be equilibrated. - threshold_neff : int, optional, default=1 - Minimum amount of uncorrelated samples. - strict : bool, optional, default=False - How strict should equilibration check be? - overwrite : bool, optional, default=False - Do not write to filename if a file already exists with the same name. - Set to True to overwrite exisiting files. - data_plt_kwargs : dict, optional, default={} - Pass in a dictionary of keyword arguments to plot the data. - vline_plt_kwargs : dict, optional, default={} - Pass in a dictionary of keyword arguments for the vertical line - denoting t0. - """ - from reproducibility_project.src.utils.plotting import ( - plot_data_with_t0_line, - ) - - fname = pathlib.Path(filename) - fname = fname.name - with open(job.fn(log_filename), "r") as f: - line1 = f.readline() - a_t = pd.read_csv( - f, delim_whitespace=True, names=line1.replace("#", "").split() - ) - - if data_plt_kwargs is None: - data_plt_kwargs = dict() - if vline_plt_kwargs is None: - vline_plt_kwargs = dict() - with job: - plot_data_with_t0_line( - filename=fname, - a_t=a_t[property_name].to_numpy(), - vline_scale=vline_scale, - title=title, - overwrite=overwrite, - threshold_fraction=threshold_fraction, - threshold_neff=threshold_neff, - data_plt_kwargs=data_plt_kwargs, - vline_plt_kwargs=vline_plt_kwargs, - ) diff --git a/reproducibility_project/hoomd4_subproject/src/analysis/rdf.py b/reproducibility_project/hoomd4_subproject/src/analysis/rdf.py deleted file mode 100644 index 78646190..00000000 --- a/reproducibility_project/hoomd4_subproject/src/analysis/rdf.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Facilitate the calculation of the RDF from simulation data.""" - -import freud -import gsd -import gsd.hoomd -import matplotlib.pyplot as plt -import numpy as np - - -def gsd_rdf( - job, - filename="trajectory.gsd", - frames=10, - stride=1, - bins=50, - r_min=0.5, - r_max=None, - ensemble=None, -): - """Compute the RDF given a Signac Job object. - - The job folder is expected to contain a trajectory file with lengths in - nanometers. - This function is a convenience wrapper for freud's RDF module - https://freud.readthedocs.io/en/latest/modules/density.html#freud.density.RDF - After execution, the files "rdf.png" and "rdf.txt" are created in the job - folder. - - Parameters - ---------- - job : signac.contrib.job.Job - The Job object. - filename : str, default "trajectory.gsd" - The relative path (from the job directory) to the trajectory file to be - analyzed. - frames : int, default 10 - The number of frames from the trajectory to average. Up to and always - uses the last frame. - stride : int, default 1 - The step size between frames - bins : int, default 50 - The number of bins in the RDF histogram. - r_min : float, default 0.5 - The minimum distance (in nm) to calculate the RDF. - r_max : float, default None - The maximum distance (in nm) to calculate the RDF. If None is provided, - the minimum box length times a factor of 0.45 will be used. - ensemble : str, default None - The ensemble the rdf data is being calculated from, will change output file names. - - Returns - ------- - freud.density.RDF - Computed RDF object - """ - if ensemble is None: - ensemble = "" - elif ensemble in ["npt", "nvt"]: - pass - else: - raise ValueError( - f"Ensemble provided: {ensemble}. Expected: 'None', 'npt', 'nvt'." - ) - rdf = _gsd_rdf(job.fn(filename), frames, stride, bins, r_min, r_max) - - fig, ax = plt.subplots() - ax.plot(rdf.bin_centers, rdf.rdf) - ax.set_xlabel("$r (nm)$") - ax.set_ylabel("$g(r)$") - ax.set_title("RDF") - - fig.savefig(job.fn(f"{ensemble}_rdf.png")) - - rdf_array = np.vstack((rdf.bin_centers, rdf.rdf)).T - np.savetxt(job.fn(f"{ensemble}_rdf.txt"), rdf_array) - return rdf - - -def _gsd_rdf(gsdfile, frames=10, stride=1, bins=50, r_min=0.5, r_max=None): - """Compute the RDF given a GSD file.""" - if r_max is None: - with gsd.hoomd.open(gsdfile) as trajectory: - box = trajectory[-1].configuration.box[:3] - r_max = min(box) * 0.45 - - rdf = freud.density.RDF(bins=bins, r_min=r_min, r_max=r_max) - - with gsd.hoomd.open(gsdfile) as trajectory: - start = -(frames * stride) + stride - 1 - for frame in trajectory[start::stride]: - rdf.compute(frame, reset=False) - - return rdf diff --git a/reproducibility_project/hoomd4_subproject/src/analysis/sampler.py b/reproducibility_project/hoomd4_subproject/src/analysis/sampler.py deleted file mode 100644 index 707e6879..00000000 --- a/reproducibility_project/hoomd4_subproject/src/analysis/sampler.py +++ /dev/null @@ -1,237 +0,0 @@ -"""Use the pymbar package to perform decorrelated equilibration sampling.""" - -from typing import List -from warnings import warn - -import numpy as np -import pandas as pd -import pymbar -import signac -from pymbar import timeseries - -from reproducibility_project.src.analysis.equilibration import is_equilibrated - - -def sample_job( - job: signac.contrib.job.Job, - ensemble: str, - filename: str = "log.txt", - variable: str = "potential_energy", - threshold_fraction: float = 0.75, - threshold_neff: int = 100, - strict: bool = False, - monte_carlo_override: bool = False, -): - """Use the timeseries module from pymbar to perform statistical sampling. - - The start, end and decorrleated step size of the production region are - added to the job document. - - Parameters - ---------- - job : signac.contrib.job.Job - The Job object. - ensemble : str - The ensemble of interest, affects the name of the sampled values in - the job.doc - filename : str, default "log.txt" - The relative path (from the job directory) to the log file to be - analyzed. - variable : str; default "potential_energy" - The variable to be used in sampling. - threshold_fraction : float, optional, default=0.75 - Fraction of data expected to be equilibrated. - threshold_neff : int, optional, default=100 - Minimum amount of uncorrelated samples to be considered equilibrated - strict : bool, default=False - If strict, require both threshold_fraction and threshold_neff to be - true to evaluate as 'equilibrated'. - monte_carlo_override : bool, optional, default=False - Consider the entire data set passed in as production data that is fully equilibrated. - """ - doc_name = f"{ensemble}/sampling_results" - - data = np.genfromtxt(job.fn(filename), names=True)[variable] - data_shape = data.shape - if not monte_carlo_override: - start, stop, step, Neff = _decorr_sampling( - data, - threshold_fraction=threshold_fraction, - threshold_neff=threshold_neff, - strict=strict, - ) - else: - start = 0 - stop = data_shape[0] - 1 - step = 1 - Neff = data_shape[0] - - try: - job.doc[doc_name] - except KeyError: - job.doc[doc_name] = {} - - if start is not None: - job.doc[doc_name][variable] = { - "start": start, - "stop": stop, - "step": step, - "Neff": Neff, - } - else: - msg = f""" - Property {variable} is not equilibrated. - Additional information: - JOB_ID: {job.id}\tPROPERTY: {variable}\tMOLECULE: {job.sp.molecule}\tENGINE: {job.sp.engine}\tTEMP: {job.sp.temperature}\tPRESS: {job.sp.pressure} - """ - warn(msg) - - -def get_subsampled_values( - job: signac.contrib.project.Job, - prop: str, - ensemble: str, - property_filename: str = "log-npt.txt", -) -> None: - """Return subsampled values based on the sampling results of sample_job. - - Using the results from `sample_job` in the job document, iterate through - the property file and return a numpy array of the subsampled data. - - This only writes out the subsampled values, the statistical averaging - will take place in the `analysis-project.py` file. - - Parameters - ---------- - job : signac.contrib.project.Job, required - The signac job to operate on. - prop : str, required - The property of interest to write out the subsampled data. - ensemble : str, required - The ensemble that the data was sampled from. - property_filename : str, optional, default="log-npt.txt" - The filename to sample the data from. - - Examples - -------- - >>> arr = write_subsampled_values(job, prop="potential_energy", - ensemble="npt", - property_filename="log-npt.txt") - >>> assert isinstance(arr, np.ndarray) - """ - if not isinstance(job, signac.contrib.project.Job): - raise TypeError( - f"Expected input 'job' of type signac.contrib.project.Job, was provided: {type(job)}" - ) - - if prop is None or prop == "": - raise ValueError( - f"Expected 'prop' to be a name of a property, was provided {prop}." - ) - - sampling_dict = job.doc[f"{ensemble}/sampling_results"][f"{prop}"] - start = sampling_dict["start"] - stop = sampling_dict["stop"] - step = sampling_dict["step"] - indices = [idx for idx in range(start, stop, step)] - - if not job.isfile(f"{property_filename}"): - raise FileNotFoundError( - f"File {property_filename} does not exist in {job}'s workspace." - ) - - with job: - with open(f"{property_filename}", "r") as f: - line1 = f.readline() - df = pd.read_csv( - f, delim_whitespace=True, names=line1.replace("#", "").split() - ) - if df.get(f"{prop}", None) is None: - return np.asarray([None] * len(indices)) - else: - return df[f"{prop}"].to_numpy()[indices] - - -def _decorr_sampling( - data, threshold_fraction=0.75, threshold_neff=100, strict=False -): - """Use the timeseries module from pymbar to perform statistical sampling. - - Parameters - ---------- - data : numpy.Array - 1-D time dependent data to check for equilibration - threshold_fraction : float, default=0.75 - Fraction of data expected to be equilibrated. - threshold_neff : int, default=100 - Minimum amount of uncorrelated samples to be considered equilibrated - strict : bool, default=False - If strict, require both threshold_fraction and threshold_neff to be - true to evaluate as 'equilibrated'. - """ - is_equil, prod_start, ineff, Neff = is_equilibrated( - data, - threshold_fraction=threshold_fraction, - threshold_neff=threshold_neff, - strict=strict, - nskip=1, - ) - if is_equil: - uncorr_indices = timeseries.subsampleCorrelatedData( - data[prod_start:], g=ineff, conservative=True - ) - return ( - uncorr_indices.start + prod_start, - uncorr_indices.stop + prod_start, - uncorr_indices.step, - Neff, - ) - else: - warn( - "Property does not have requisite threshold of production data " - "expected. More production data is needed, or the threshold needs " - "to be lowered. See project.src.analysis.equilibration.is_equilibrated" - " for more information." - ) - return (None, None, None, None) - - -def get_decorr_samples_using_max_t0( - job: signac.contrib.Project.Job, - ensemble: str, - property_filename: str, - prop: str, - threshold_fraction: float = 0.75, - threshold_neff: int = 100, - is_monte_carlo: bool = False, -) -> List[float]: - """Return the subsamples of data according to maximum t0.""" - t0 = job.doc.get(f"{ensemble}/max_t0") - - if t0 is None: - raise ValueError( - "Max t0 has not been calculated yet, refer to project-analysis.py" - ) - - with job: - with open(f"{property_filename}", "r") as f: - line1 = f.readline() - df = pd.read_csv( - f, delim_whitespace=True, names=line1.replace("#", "").split() - ) - a_t = df[f"{prop}"].to_numpy()[t0:] - if is_monte_carlo: - uncorr_indices = [val for val in range(t0, len(a_t))] - else: - try: - uncorr_indices = timeseries.subsampleCorrelatedData( - A_t=a_t, - conservative=True, - ) - except pymbar.utils.ParameterError as e: - warn( - f"This is a captured ParameterError from pymbar {e}, most likely due to zeroes being passed for the values. Returning the unsampled data" - ) - uncorr_indices = [i for i in range(t0, len(a_t))] - - return a_t[uncorr_indices] diff --git a/reproducibility_project/hoomd4_subproject/src/dashboard.py b/reproducibility_project/hoomd4_subproject/src/dashboard.py deleted file mode 100644 index 1bcdef7e..00000000 --- a/reproducibility_project/hoomd4_subproject/src/dashboard.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Dashboard for viewing data associated with jobs in a signac project.""" - -from signac_dashboard import Dashboard -from signac_dashboard.modules import ( - ImageViewer, - Notes, - StatepointList, - TextDisplay, - VideoViewer, -) - - -class PlotDashboard(Dashboard): - """Subclass of main Dashboard class to provide project-specific methods.""" - - def job_sorter(self, job): - """Sorts jobs based on provided statepoint parameters.""" - return [job.sp.simulation_engine] - - -if __name__ == "__main__": - modules = [] - modules.append(StatepointList()) - PlotDashboard(modules=modules).main() diff --git a/reproducibility_project/hoomd4_subproject/src/engines/README.md b/reproducibility_project/hoomd4_subproject/src/engines/README.md deleted file mode 100644 index ddd443f5..00000000 --- a/reproducibility_project/hoomd4_subproject/src/engines/README.md +++ /dev/null @@ -1 +0,0 @@ -We are using separate `signac` projects for each simulation engine. diff --git a/reproducibility_project/hoomd4_subproject/src/engines/__init__.py b/reproducibility_project/hoomd4_subproject/src/engines/__init__.py deleted file mode 100644 index c0b8efc3..00000000 --- a/reproducibility_project/hoomd4_subproject/src/engines/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Per-engine signac projects and additional information.""" diff --git a/reproducibility_project/init.py b/reproducibility_project/init.py index 743c2d6b..5bd8fb55 100644 --- a/reproducibility_project/init.py +++ b/reproducibility_project/init.py @@ -32,9 +32,8 @@ def dict_product(dd): "gromacs", "hoomd", "lammps-VU", - # "lammps-UD", ] -md_engines = ["gromacs", "hoomd", "lammps-VU"] # , "lammps-UD"] +md_engines = ["gromacs", "hoomd", "lammps-VU"] mc_engines = ["cassandra", "mcccs", "gomc"] forcefields = {} r_cuts = {} diff --git a/reproducibility_project/lrc_shift_subproject/aggregate_summary/aggregate_init.py b/reproducibility_project/lrc_shift_subproject/aggregate_summary/aggregate_init.py index 38c6b453..fe581e6e 100644 --- a/reproducibility_project/lrc_shift_subproject/aggregate_summary/aggregate_init.py +++ b/reproducibility_project/lrc_shift_subproject/aggregate_summary/aggregate_init.py @@ -16,9 +16,8 @@ "gromacs", "hoomd", "lammps-VU", - "lammps-UD", ] -md_engines = ["gromacs", "hoomd", "lammps-VU", "lammps-UD"] +md_engines = ["gromacs", "hoomd", "lammps-VU"] mc_engines = ["cassandra", "mcccs", "gomc"] forcefields = {} r_cuts = {} diff --git a/reproducibility_project/lrc_shift_subproject/src/engine_input/lammps-UD/.keep b/reproducibility_project/lrc_shift_subproject/src/engine_input/lammps-UD/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/reproducibility_project/lrc_shift_subproject/src/engines/hoomd/project_rahman.py b/reproducibility_project/lrc_shift_subproject/src/engines/hoomd/project.py similarity index 100% rename from reproducibility_project/lrc_shift_subproject/src/engines/hoomd/project_rahman.py rename to reproducibility_project/lrc_shift_subproject/src/engines/hoomd/project.py diff --git a/reproducibility_project/lrc_shift_subproject/src/engines/lammps-UD/.keep b/reproducibility_project/lrc_shift_subproject/src/engines/lammps-UD/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/reproducibility_project/lrc_shift_subproject/subproject-analysis.py b/reproducibility_project/lrc_shift_subproject/subproject-analysis.py index e245b0de..e6d3652d 100644 --- a/reproducibility_project/lrc_shift_subproject/subproject-analysis.py +++ b/reproducibility_project/lrc_shift_subproject/subproject-analysis.py @@ -36,7 +36,7 @@ def add_args(cls, parser): mc_engines = ("gomc", "mcccs", "cassandra") -md_engines = ("gromacs", "hoomd", "lammps-VU") # , "lammps-UD") +md_engines = ("gromacs", "hoomd", "lammps-VU") md_npt_props = [ "potential_energy", "kinetic_energy", diff --git a/reproducibility_project/lrc_shift_subproject/subproject-init.py b/reproducibility_project/lrc_shift_subproject/subproject-init.py index 91ba86f0..62abee15 100644 --- a/reproducibility_project/lrc_shift_subproject/subproject-init.py +++ b/reproducibility_project/lrc_shift_subproject/subproject-init.py @@ -17,9 +17,8 @@ "gromacs", "hoomd", "lammps-VU", - "lammps-UD", ] -md_engines = ["gromacs", "hoomd", "lammps-VU", "lammps-UD"] +md_engines = ["gromacs", "hoomd", "lammps-VU"] mc_engines = ["cassandra", "mcccs", "gomc"] forcefields = {} r_cuts = {} diff --git a/reproducibility_project/project-analysis.py b/reproducibility_project/project-analysis.py index e245b0de..43bb820d 100644 --- a/reproducibility_project/project-analysis.py +++ b/reproducibility_project/project-analysis.py @@ -36,7 +36,7 @@ def add_args(cls, parser): mc_engines = ("gomc", "mcccs", "cassandra") -md_engines = ("gromacs", "hoomd", "lammps-VU") # , "lammps-UD") +md_engines = ("gromacs", "hoomd", "lammps-VU") md_npt_props = [ "potential_energy", "kinetic_energy", @@ -50,17 +50,6 @@ def add_args(cls, parser): "pressure", "density", ] -# md_nvt_props = [ -# "potential_energy", -# "kinetic_energy", -# "temperature", -# # "density", -# ] -# mc_nvt_props = [ -# "potential_energy", -# "temperature", -# # "density", -# ] # make a FlowGroup for all analysis operations, usually faster than the plotting methods @@ -560,99 +549,6 @@ def calc_aggregate_prop_statistics(*jobs): simulation_type="mc", ) -''' -@Project.operation -@Project.pre(lambda job: job.isfile("log-npt.txt")) -@Project.post(lambda job: job.isfile("density-npt.png")) -@flow.with_job -def plot_npt_prod_data_with_t0(job): - """Generate plots for production data with t0 as a vertical line.""" - import pandas as pd - - from reproducibility_project.src.analysis.equilibration import ( - plot_job_property_with_t0, - ) - - ensemble = "npt" - - # plot t0 - with open(job.fn("log-npt.txt"), "r") as f: - line1 = f.readline() - df = pd.read_csv( - f, delim_whitespace=True, names=line1.replace("#", "").split() - ) - """ - for prop in df.columns: - data_plt_kwarg = {"label": prop} - fname = str(prop) + "-" + ensemble + ".png" - plot_job_property_with_t0( - job, - filename=fname, - property_name=prop, - log_filename="log-npt.txt", - title=prop.upper(), - overwrite=True, - threshold_fraction=0.0, - threshold_neff=1, - strict=False, - vline_scale=1.1, - data_plt_kwargs=data_plt_kwarg, - ) - """ - data_plt_kwarg = {"label": "density"} - fname = "density" + "-" + ensemble + ".png" - plot_job_property_with_t0( - job, - filename=fname, - property_name="density", - log_filename="log-npt.txt", - title="Density", - overwrite=True, - threshold_fraction=0.0, - threshold_neff=1, - strict=False, - vline_scale=1.1, - data_plt_kwargs=data_plt_kwarg, - ) -''' - -''' -@Project.operation -# @Project.pre(lambda job: job.isfile("trajectory-nvt.gsd")) -@Project.pre(lambda job: job.isfile("log-nvt.txt")) -@Project.pre(lambda job: False) -@flow.with_job -def plot_nvt_prod_data_with_t0(job): - """Generate plots for production data with t0 as a vertical line.""" - import pandas as pd - - from reproducibility_project.src.analysis.equilibration import ( - plot_job_property_with_t0, - ) - - ensemble = "nvt" - - # plot t0 - with open(job.fn("log-nvt.txt", 'r') as f: - line1 = f.readline() - df = pd.read_csv(f, delim_whitespace=True, names=line1.replace('#', '').split()) - for prop in df.columns: - data_plt_kwarg = {"label": prop} - fname = str(prop) + "-" + ensemble + ".png" - plot_job_property_with_t0( - job, - filename=fname, - property_name=prop, - log_filename="log-nvt.txt", - title=prop.upper(), - overwrite=True, - threshold_fraction=0.0, - threshold_neff=1, - strict=False, - vline_scale=1.1, - data_plt_kwargs=data_plt_kwarg, - ) -''' if __name__ == "__main__": pr = Project() diff --git a/reproducibility_project/spe_subproject/bin/clean_by_engine.sh b/reproducibility_project/spe_subproject/bin/clean_by_engine.sh index 80b8fb3c..ab7a9810 100644 --- a/reproducibility_project/spe_subproject/bin/clean_by_engine.sh +++ b/reproducibility_project/spe_subproject/bin/clean_by_engine.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash echo Removing jobs from all engines but $1 -for ENGINE in cassandra mcccs gomc gromacs hoomd lammps-VU lammps-UD +for ENGINE in cassandra mcccs gomc gromacs hoomd lammps-VU do if [[ $ENGINE != $1 ]] then diff --git a/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/.keep b/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.minimize b/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.minimize deleted file mode 100644 index 25f05414..00000000 --- a/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.minimize +++ /dev/null @@ -1,49 +0,0 @@ -# Lammps script to read in mBuild topologies and perform energy minimization -# Initialization -units real -boundary p p p -atom_style full - -# Assume ff info is included in data file -pair_style lj/cut/coul/cut ${rcut} #modify cutoff from job.sp.r_cut -bond_style harmonic -angle_style harmonic -dihedral_style opls -read_data box.lammps -#fix pppm -pair_modify shift ${pass_shift} tail ${pass_lrc} -#fix shake -neighbor 2.5 bin #skin cutoff - -timestep 0.1 -thermo 2000 -thermo_style custom step temp press pe epair emol ke etotal density - -variable tsample equal ${T} #kelvin # modify from job.sp.temperature -variable psample equal ${P}/101.325 #kPa to atm job.sp.pressure -velocity all create ${T} ${seed}+1 #temperature in kelvin -# ________________________________________________________________________________________ - - - - - - -# Minimize energy -minimize 1e-4 1e-4 1000 1000 -fix integrator all nve/limit 0.1 -fix zeromom all momentum 100 linear 1 1 1 -run 10000 -unfix integrator -minimize 1e-4 1e-4 1000 1000 -fix integrator all nve -run 10000 -unfix integrator -minimize 1e-4 1e-4 1000 1000 -fix integrator all nvt temp ${tsample} ${tsample} 100.0 -run 50000 -timestep ${tstep} -run 50000 -unfix integrator -reset_timestep 0 #reset timestep so the first minimize restart file has a consistent name -write_restart minimized.restart-* diff --git a/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.spe b/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.spe deleted file mode 100644 index aafae8d5..00000000 --- a/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/in.spe +++ /dev/null @@ -1,56 +0,0 @@ -# Lammps script to read in mBuild topologies and perform energy minimization -# Initialization -units real -boundary p p p -atom_style full - -# Assume ff info is included in data file -pair_style lj/cut/coul/cut ${rcut} # read in from job.sp.r_cut -bond_style harmonic -angle_style harmonic -dihedral_style opls -read_data box.lammps -#fix pppm -pair_modify shift ${pass_shift} tail ${pass_lrc} #TODO: Look to make sure this shift is okay -#fix shake -neighbor 2.5 bin #skin cutoff - -timestep ${tstep} -variable runtime equal 0 -thermo 1000 -thermo_style custom etotal pe evdwl ecoul epair ebond eangle edihed etail elong - -variable tsample equal ${T} #kelvin, modified by job.sp.temperature -variable psample equal ${P}/101.325 #kPa to atm modified by job.sp.pressure -# ________________________________________________________________________________________ - - - - - -# Production -variable startstep equal step -variable e equal etotal -variable p equal pe -variable v equal evdwl -variable c equal ecoul -variable pa equal epair -variable b equal ebond -variable a equal eangle -variable d equal edihed -variable t equal etail -variable k equal elong -#variable filename string "prlog-npt.txt" -#fix output_data all print 1000 "$e $p $v $c ${pa} $b $a $d $t $k" append ${filename} title "#total pe vdw coul pair bonds angle dihedral tail kspace" -dump traj1 all dcd 10000 prod-npt.dcd -fix integrator all npt temp ${tsample} ${tsample} 100.0 iso ${psample} ${psample} 1000.0 pchain 10 -restart 5000 equilibrated-npt.restart-0 equilibrated-npt.restart-1 -run 0 upto - - -print "#step total pe vdw coul pair bonds angle dihedral tail kspace" file prlog-npt.txt -print "${startstep} $e $p $v $c ${pa} $b $a $d $t $k" append prlog-npt.txt - -unfix intiegrator -uindump traj1 -reset_timestep 0 diff --git a/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/submit.slurm b/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/submit.slurm deleted file mode 100644 index e5f04539..00000000 --- a/reproducibility_project/spe_subproject/src/engine_input/lammps-UD/submit.slurm +++ /dev/null @@ -1,60 +0,0 @@ -#! /bin/bash -#SBATCH --job-name="rpd-uamethane" -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --time=00:9:59 -#SBATCH --export=ALL -#SBATCH --mail-user=zijiewu@udel.edu -#SBATCH --mail-type=BEGIN,END,FAIL -# SBATCH --requeue -# SBATCH --partition=_workgroup_ -#SBATCH --mem=8G - -export VALET_PATH=$VALET_PATH:$WORKDIR/udsw/valet/etc -vpkg_require jlab-lammps - - -infile=$1 -seed=$2 -T=$3 -P=$4 -rcut=$5 -tstep=$6 -pass_lrc=$7 -pass_shift=$8 - - -OPENMPI_VERSION='unknown' -OMPI_INFO="$(which ompi_info)" -if [ $? -eq 0 -a -n "$OMPI_INFO" ]; then - OPENMPI_VERSION="$(${OMPI_INFO} --version | egrep 'v[0-9]+' | sed 's/^.*v//')" -fi -OPENMPI_FLAGS="--display-map --mca btl ^tcp" -if [ "${WANT_CPU_AFFINITY:-NO}" = "YES" ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --bind-to core" -fi -if [ "${WANT_NPROC:-0}" -gt 0 ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --np ${WANT_NPROC} --map-by node" -fi -if [ "${SHOW_MPI_DEBUGGING:-NO}" = "YES" ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --debug-devel --debug-daemons --display-devel-map --display-devel-allocation --mca mca_verbose 1 --mca coll_base_verbose 1 --mca ras_base_verbose 1 --mca ras_gridengine_debug 1 --mca ras_gridengine_verbose 1 --mca btl_base_verbose 1 --mca mtl_base_verbose 1 --mca plm_base_verbose 1 --mca pls_rsh_debug 1" - if [ "${WANT_CPU_AFFINITY:-NO}" = "YES" ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --report-bindings" - fi -fi - -if [ $((1)) ] - then - echo "GridEngine parameters:" - echo " mpirun = "`which mpirun` - echo " nhosts = $NHOSTS" - echo " nproc = $NSLOTS" - echo " executable = $MY_EXE" - echo " Open MPI vers = $OPENMPI_VERSION" - echo " MPI flags = $OPENMPI_FLAGS" - echo "-- begin OPENMPI run --" - mpirun ${OPENMPI_FLAGS} lmp -in $infile -var seed $seed -var T $T -var P $P -var rcut $rcut -var tstep $tstep -var pass_lrc $pass_lrc -var pass_shift $pass_shift - echo $SLURM_JOB_ID $SLURM_SUBMIT_DIR >> ~/job-id-dirs.txt - rc=$? - echo "-- end OPENMPI run --" -fi diff --git a/reproducibility_project/spe_subproject/src/engines/lammps-UD/.keep b/reproducibility_project/spe_subproject/src/engines/lammps-UD/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/reproducibility_project/spe_subproject/src/engines/lammps-UD/example-project.py b/reproducibility_project/spe_subproject/src/engines/lammps-UD/example-project.py deleted file mode 100644 index 8e205ee8..00000000 --- a/reproducibility_project/spe_subproject/src/engines/lammps-UD/example-project.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Setup for signac, signac-flow, signac-dashboard for this study.""" - -import os -import pathlib - -import flow -import numpy as np -from flow import environments - - -class Project(flow.FlowProject): - """Subclass of FlowProject to provide custom methods and attributes.""" - - def __init__(self): - super().__init__() - - -# ____________________________________________________________________________ -"""Setting progress label""" - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "MYENGINENAME") -def CreatedEngineInput(job): - """Check if the .json molecule topology was converted to engine input.""" - return job.isfile("MYENGINEINPUTS") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "MYENGINENAME") -def OutputThermoData(job): - """Check if the engine loaded the input files and wrote out thermo data.""" - return job.isfile("NAMEOFMYTHERMOOUTPUT") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "MYENGINENAME") -def FinishedSPECalc(job): - """Check if the log-spe.txt has been created.""" - return job.isfile("log-spe.txt") - - -# _____________________________________________________________________ -"""Setting up workflow operation""" - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "MYENGINENAME") -@Project.post(CreatedEngineInput) -@flow.with_job -def LoadSystemSnapShot(job): - """Create initial configurations of the system statepoint.""" - import mbuild as mb - - pr = Project() - snapshot_directory = ( - pathlib.Path(pr.root_directory()) / "src" / "system_snapshots" - ) - molecule = job.sp.molecule - molecule_filename = molecule + ".json" - box = mb.load(str(snapshot_directory / molecule_filename)) - # Apply forcefield and write out engine input files - # __________________________________________________ - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "MYENGINENAME") -@Project.pre(CreatedEngineInput) -@Project.post(OutputThermoData) -@flow.with_job -def CalculateEnergy(job): - """Load onto a cluster and output the point energy for the snapshot.""" - # __________________________________________________ - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "MYENGINENAME") -@Project.pre(OutputThermoData) -@Project.post(FinishedSPECalc) -@flow.with_job -@flow.cmd -def FormatTextFile(job): - """Take the output from the simulation engine and convert it to log-spe.txt for data comparisons. - - See README.md for spe_subproject for formatting information. - """ - # __________________________________________________ - - -if __name__ == "__main__": - pr = Project() - pr.main() diff --git a/reproducibility_project/spe_subproject/src/engines/lammps-UD/main_project.py b/reproducibility_project/spe_subproject/src/engines/lammps-UD/main_project.py deleted file mode 100644 index 2dfa6ce5..00000000 --- a/reproducibility_project/spe_subproject/src/engines/lammps-UD/main_project.py +++ /dev/null @@ -1,428 +0,0 @@ -"""Setup for signac, signac-flow, signac-dashboard for this study.""" - -import os -import pathlib -import sys - -import flow -import numpy as np -from flow import environments - - -class Project(flow.FlowProject): - """Subclass of FlowProject to provide custom methods and attributes.""" - - def __init__(self): - super().__init__() - current_path = pathlib.Path(os.getcwd()).absolute() - self.data_dir = current_path.parents[1] / "data" - self.ff_fn = self.data_dir / "forcefield.xml" - - -# ____________________________________________________________________________ -"""Setting progress label""" - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def lammps_created_box(job): - """Check if the lammps simulation box has been created for the job.""" - return job.isfile("box.lammps") and job.isfile("box.json") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def lammps_copy_files(job): - """Check if the submission scripts have been copied over for the job.""" - return job.isfile("submit.slurm") and job.isfile("in.minimize") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def lammps_minimized_equilibrated_nvt(job): - """Check if the lammps minimization step has run for the job.""" - return job.isfile("minimized.restart-0") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@flow.with_job -def lammps_equilibrated_npt(job): - """Check if the lammps equilibration step has run and passed is_equilibrated for the job.""" - import pathlib - - import numpy as np - - from reproducibility_project.src.analysis.equilibration import ( - is_equilibrated, - ) - - p = pathlib.Path(".") - list_of_filenames = list(p.glob("eqlog*.txt")) - # grab the filename with the largest number - counter = -1 - latest_eqdata = False - for file in list_of_filenames: - step = int(file.name[5:].split(".")[0]) - if step > counter: - counter = step - latest_eqdata = file - if latest_eqdata: - try: - data = np.genfromtxt(latest_eqdata.name) - check_equil = [ - is_equilibrated(data[:, 1])[0], - is_equilibrated(data[:, 2])[0], - is_equilibrated(data[:, 4])[0], - is_equilibrated(data[:, 6])[0], - ] - except (IOError, IndexError) as e: - check_equil = [False, False, False, False] - else: - check_equil = [False, False, False, False] - return job.isfile("equilibrated-npt.restart-0") and np.all(check_equil) - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def lammps_production_npt(job): - """Check if the lammps production step has run for the job.""" - return job.isfile("production-npt.restart-0") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def lammps_production_nvt(job): - """Check if the lammps nvt production step has run for the job.""" - return job.isfile("production-nvt.restart-0") - - -# sample job to get decorrelated data - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def lammps_reformatted_data(job): - """Check if lammps has output density information for the job.""" - return job.isfile("log-npt.txt") and job.isfile("log-nvt.txt") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def lammps_created_gsd(job): - """Check if the mdtraj has converted the production to a gsd trajectory for the job.""" - return job.isfile("trajectory-npt.gsd") - - -# _____________________________________________________________________ -"""Setting up workflow operation""" - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.post(lammps_created_box) -@flow.with_job -def built_lammps(job): - """Create initial configurations of the system statepoint.""" - import foyer - from mbuild.formats.lammpsdata import write_lammpsdata - - from reproducibility_project.src.molecules.system_builder import ( - construct_system, - ) - from reproducibility_project.src.utils.forcefields import load_ff - - system = construct_system(job.sp)[0] - parmed_structure = system.to_parmed() - ff = load_ff(job.sp.forcefield_name) - system.save( - "box.json" - ) # save the compound as a json object for reading back in to mbuild - typed_box = ff.apply(parmed_structure) - typed_box.save( - "box.top" - ) # save to gromacs topology for later conversions in mdtraj - typed_box.save( - "box.gro" - ) # save to gromacs topology for later conversions in mdtraj - write_lammpsdata( - typed_box, - "box.lammps", - atom_style="full", - unit_style="real", - mins=[system.get_boundingbox().vectors[0]], - maxs=[system.get_boundingbox().vectors[1]], - use_rb_torsions=True, - ) # write out a lammps topology - return - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(lammps_created_box) -@Project.post(lammps_copy_files) -@flow.with_job -@flow.cmd -def lammps_cp_files(job): - """Copy over run files for lammps and the PBS scheduler.""" - lmps_submit_path = ( - "../../src/engine_input/lammps/input_scripts/submit.slurm" - ) - lmps_run_path = "../../src/engine_input/lammps/input_scripts/in.*" - msg = f"cp {lmps_submit_path} {lmps_run_path} ./" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(lammps_copy_files) -@Project.post(lammps_minimized_equilibrated_nvt) -@flow.with_job -@flow.cmd -def lammps_em_nvt(job): - """Run energy minimization and nvt ensemble.""" - if job.sp.molecule == "ethanolAA": - tstep = 1.0 - else: - tstep = 2.0 - in_script_name = "in.minimize" - if job.sp.molecule in ["waterSPCE", "ethanolAA"]: - modify_engine_scripts( - in_script_name, 7, "pair_style lj/cut/coul/long ${rcut}\n" - ) - add_pppm(in_script_name, 12) - if job.sp.molecule in ["ethanolAA"]: - add_14coul(in_script_name, 28) - r_cut = job.sp.r_cut * 10 - modify_submit_scripts(in_script_name, job.id) - pass_lrc = "yes" - pass_shift = "no" - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica+1} {job.sp.temperature} {job.sp.pressure} {r_cut} {tstep} {pass_lrc} {pass_shift}" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(lammps_minimized_equilibrated_nvt) -@Project.post(lammps_equilibrated_npt) -@flow.with_job -@flow.cmd -def lammps_equil_npt(job): - """Run npt ensemble equilibration.""" - if job.sp.molecule == "ethanolAA": - tstep = 1.0 - else: - tstep = 2.0 - in_script_name = "in.equilibration" - modify_submit_scripts(in_script_name, job.id) - if job.sp.molecule in ["waterSPCE"]: - add_shake(in_script_name, 14) - if job.sp.molecule in ["waterSPCE", "ethanolAA"]: - add_pppm(in_script_name, 12) - modify_engine_scripts( - in_script_name, 7, "pair_style lj/cut/coul/long ${rcut}\n" - ) - if job.sp.molecule in ["ethanolAA"]: - add_14coul(in_script_name, 28) - r_cut = job.sp.r_cut * 10 - pass_lrc = "yes" - pass_shift = "no" - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica+1} {job.sp.temperature} {job.sp.pressure} {r_cut} {tstep} {pass_lrc} {pass_shift}" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(lammps_equilibrated_npt) -@Project.post(lammps_production_npt) -@flow.with_job -@flow.cmd -def lammps_prod_npt(job): - """Run npt ensemble production.""" - if job.sp.molecule == "ethanolAA": - tstep = 1.0 - else: - tstep = 2.0 - in_script_name = "in.production-npt" - modify_submit_scripts(in_script_name, job.id) - if job.sp.molecule in ["waterSPCE"]: - add_shake(in_script_name, 14) - if job.sp.molecule in ["waterSPCE", "ethanolAA"]: - add_pppm(in_script_name, 12) - modify_engine_scripts( - in_script_name, 7, "pair_style lj/cut/coul/long ${rcut}\n" - ) - if job.sp.molecule in ["ethanolAA"]: - add_14coul(in_script_name, 28) - r_cut = job.sp.r_cut * 10 - pass_lrc = "yes" - pass_shift = "no" - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica+1} {job.sp.temperature} {job.sp.pressure} {r_cut} {tstep} {pass_lrc} {pass_shift}" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(lammps_production_npt) -@Project.post(lammps_production_nvt) -@flow.with_job -@flow.cmd -def lammps_prod_nvt(job): - """Run npt ensemble production.""" - if job.sp.molecule == "ethanolAA": - tstep = 1.0 - else: - tstep = 2.0 - in_script_name = "in.production-nvt" - modify_submit_scripts(in_script_name, job.id) - if job.sp.molecule in ["waterSPCE"]: - add_shake(in_script_name, 14) - if job.sp.molecule in ["waterSPCE", "ethanolAA"]: - add_pppm(in_script_name, 12) - modify_engine_scripts( - in_script_name, 7, "pair_style lj/cut/coul/long ${rcut}\n" - ) - if job.sp.molecule in ["ethanolAA"]: - add_14coul(in_script_name, 28) - r_cut = job.sp.r_cut * 10 - pass_lrc = "yes" - pass_shift = "no" - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica+1} {job.sp.temperature} {job.sp.pressure} {r_cut} {tstep} {pass_lrc} {pass_shift}" - - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(lammps_production_nvt) -@Project.post(lammps_reformatted_data) -@flow.with_job -def lammps_reformat_data(job): - """Take data from thermo.txt and reformat to log.txt with correct units. - - Lammps units real: energy=kcal/mol, temp=K, press=atm, density=g/cm^3, step=2fs - Project units: energy=kJ/mol, temp=K, press=MPa, density=amu/nm^3, step=1ps - """ - import numpy as np - import pandas as pd - - attr_list = ["step", "temp", "press", "etotal", "pe", "ke", "density"] - df_npt_in = pd.read_csv( - job.ws + "/prlog-npt.txt", - delimiter=" ", - comment="#", - header=0, - names=attr_list, - ) - df_nvt_in = pd.read_csv( - job.ws + "/prlog-nvt.txt", - delimiter=" ", - comment="#", - header=0, - names=attr_list, - ) - new_titles_list = [ - "timestep", - "potential_energy", - "kinetic_energy", - "pressure", - "temperature", - "density", - ] - attr_list = ["step", "pe", "ke", "press", "temp", "density"] - KCAL_TO_KJ = 4.184 # kcal to kj - ATM_TO_MPA = 0.101325 # atm to mpa - df_npt_in["pe"] = df_npt_in["pe"] * KCAL_TO_KJ - df_nvt_in["pe"] = df_nvt_in["pe"] * KCAL_TO_KJ - df_npt_in["ke"] = df_npt_in["ke"] * KCAL_TO_KJ - df_nvt_in["ke"] = df_nvt_in["ke"] * KCAL_TO_KJ - df_npt_in["press"] = df_npt_in["press"] * ATM_TO_MPA - df_nvt_in["press"] = df_nvt_in["press"] * ATM_TO_MPA - df_npt_out = df_npt_in[attr_list] - df_nvt_out = df_nvt_in[attr_list] - df_npt_out.columns = new_titles_list - df_nvt_out.columns = new_titles_list - df_npt_out.to_csv("log-npt.txt", header=True, index=False, sep=" ") - df_nvt_out.to_csv("log-nvt.txt", header=True, index=False, sep=" ") - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(lammps_reformatted_data) -@Project.post(lammps_created_gsd) -@flow.with_job -def lammps_create_gsd(job): - """Create an rdf from the gsd file using Freud analysis scripts.""" - # Create rdf data from the production run - import mdtraj as md - - traj = md.load("prod-npt.dcd", top="box.gro") - traj.save("trajectory-npt.gsd") - traj = md.load("prod-nvt.dcd", top="box.gro") - traj.save("trajectory-nvt.gsd") - return - - -def add_shake(filename, ln): - """Add shake.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = "fix fix_shake all shake 0.00001 20 1000 b 1 a 1\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def add_14coul(filename, ln): - """Add 14 coul.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = "special_bonds lj/coul 0 0 0.5\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def add_pppm(filename, ln): - """Add pppm.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = "kspace_style pppm 0.00001\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def remove_shake(filename): - """Remove pppm.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[27] = "\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def modify_submit_scripts(filename, jobid, cores=8): - """Modify the submission scripts to include the job and simulation type in the header.""" - with open("submit.slurm", "r") as f: - lines = f.readlines() - lines[1] = "#SBATCH --job-name={}-{}\n".format(filename[3:], jobid[0:4]) - with open("submit.slurm", "w") as f: - f.writelines(lines) - return - - -def modify_engine_scripts(filename, ln, info): - """Modify any line of any scripts.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = info - with open(filename, "w") as f: - f.writelines(lines) - return - - -if __name__ == "__main__": - pr = Project() - pr.main() diff --git a/reproducibility_project/spe_subproject/src/engines/lammps-UD/project.py b/reproducibility_project/spe_subproject/src/engines/lammps-UD/project.py deleted file mode 100644 index d14d1bb5..00000000 --- a/reproducibility_project/spe_subproject/src/engines/lammps-UD/project.py +++ /dev/null @@ -1,216 +0,0 @@ -"""Setup for signac, signac-flow, signac-dashboard for this study.""" - -import os -import pathlib - -import flow -import foyer -import numpy as np -from flow import environments -from mbuild.formats.lammpsdata import write_lammpsdata - -from reproducibility_project.src.utils.forcefields import load_ff - - -class Project(flow.FlowProject): - """Subclass of FlowProject to provide custom methods and attributes.""" - - def __init__(self): - super().__init__() - - -# ____________________________________________________________________________ -"""Setting progress label""" - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def CreatedEngineInput(job): - """Check if the .json molecule topology was converted to engine input.""" - return job.isfile("in.spe") and job.isfile("box.lammps") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def OutputThermoData(job): - """Check if the engine loaded the input files and wrote out thermo data.""" - return job.isfile("prlog-npt.txt") - - -@Project.label -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -def FinishedSPECalc(job): - """Check if the log-spe.txt has been created.""" - return job.isfile("log-spe.txt") - - -# _____________________________________________________________________ -"""Setting up workflow operation""" - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.post(CreatedEngineInput) -@flow.with_job -@flow.cmd -def LoadSystemSnapShot(job): - """Create initial configurations of the system statepoint.""" - import mbuild as mb - - pr = Project() - snapshot_directory = ( - pathlib.Path(pr.root_directory()) / "src" / "system_snapshots" - ) - molecule = job.sp.molecule - molecule_filename = molecule + ".json" - box = mb.load(str(snapshot_directory / molecule_filename)) - parmed_box = box.to_parmed() - ff = load_ff(job.sp.forcefield_name) - # Apply forcefield and write out engine input files - # __________________________________________________ - typed_box = ff.apply(parmed_box) - write_lammpsdata( - typed_box, - "box.lammps", - atom_style="full", - unit_style="real", - mins=[box.get_boundingbox().vectors[0]], - maxs=[box.get_boundingbox().vectors[1]], - use_rb_torsions=True, - ) # write out a lammps topology - lmps_submit_path = "../../src/engine_input/lammps-UD/submit.slurm" - lmps_run_path = "../../src/engine_input/lammps-UD/in.*" - msg = f"cp {lmps_submit_path} {lmps_run_path} ./" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(CreatedEngineInput) -@Project.post(OutputThermoData) -@flow.with_job -@flow.cmd -def CalculateEnergy(job): - """Load onto a cluster and output the point energy for the snapshot.""" - # __________________________________________________ - tstep = 2.0 - in_script_name = "in.spe" - modify_submit_scripts(in_script_name, job.id) - if job.sp.molecule in ["waterSPCE"]: - add_shake(in_script_name, 14) - if job.sp.molecule in ["waterSPCE", "ethanolAA"]: - add_pppm(in_script_name, 12) - modify_engine_scripts( - in_script_name, 7, "pair_style lj/cut/coul/long ${rcut}\n" - ) - if job.sp.molecule in ["ethanolAA"]: - add_14coul(in_script_name, 28) - r_cut = job.sp.r_cut * 10 - pass_lrc = "yes" - pass_shift = "no" - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica+1} {job.sp.temperature} {job.sp.pressure} {r_cut} {tstep} {pass_lrc} {pass_shift}" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.engine == "lammps-UD") -@Project.pre(OutputThermoData) -@Project.post(FinishedSPECalc) -@flow.with_job -@flow.cmd -def FormatTextFile(job): - """Take the output from the simulation engine and convert it to log-spe.txt for data comparisons. - - See README.md for spe_subproject for formatting information. - """ - # __________________________________________________ - import numpy as np - import pandas as pd - - attr_list = [ - "step", - "total", - "pot", - "vdw", - "coul", - "pair", - "bonds", - "angles", - "dihedrals", - "tail", - "kspace", - ] - df_npt_in = pd.read_csv( - job.ws + "/prlog-npt.txt", delimiter=" ", comment="#", names=attr_list - ) - print(df_npt_in) - KCAL_TO_KJ = 4.184 # kcal to kj - ATM_TO_MPA = 0.101325 # atm to mpa - for attr in attr_list: - df_npt_in[attr] = df_npt_in[attr] * KCAL_TO_KJ - df_npt_in.to_csv("log-spe.txt", header=True, index=False, sep=" ") - - -def add_shake(filename, ln): - """Add SHAKE calc to input file.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = "fix fix_shake all shake 0.00001 20 1000 b 1 a 1\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def add_14coul(filename, ln): - """Add 14coul to input file.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = "special_bonds lj/coul 0 0 0.5\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def add_pppm(filename, ln): - """Add PPPM to input file.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = "kspace_style pppm 0.00001\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def remove_shake(filename): - """Remove SHAKE from input file.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[27] = "\n" - with open(filename, "w") as f: - f.writelines(lines) - return - - -def modify_submit_scripts(filename, jobid, cores=8): - """Modify the submission scripts to include the job and simulation type in the header.""" - with open("submit.slurm", "r") as f: - lines = f.readlines() - lines[1] = "#SBATCH --job-name={}-{}\n".format(filename[3:], jobid[0:4]) - with open("submit.slurm", "w") as f: - f.writelines(lines) - return - - -def modify_engine_scripts(filename, ln, info): - """Modify any line of any scripts.""" - with open(filename, "r") as f: - lines = f.readlines() - lines[ln] = info - with open(filename, "w") as f: - f.writelines(lines) - return - - -if __name__ == "__main__": - pr = Project() - pr.main() diff --git a/reproducibility_project/spe_subproject/subproject-init.py b/reproducibility_project/spe_subproject/subproject-init.py index 2c0386ee..d304105f 100644 --- a/reproducibility_project/spe_subproject/subproject-init.py +++ b/reproducibility_project/spe_subproject/subproject-init.py @@ -25,10 +25,9 @@ def dict_product(dd): "gromacs", "hoomd", "lammps-VU", - "lammps-UD", "mcccs_lammps_foyer", ] -md_engines = ["gromacs", "hoomd", "lammps-VU", "lammps-UD"] +md_engines = ["gromacs", "hoomd", "lammps-VU"] mc_engines = ["cassandra", "mcccs", "gomc", "mcccs_lammps_foyer"] forcefields = {} r_cuts = {} diff --git a/reproducibility_project/src/analysis/diffusion.py b/reproducibility_project/src/analysis/diffusion.py deleted file mode 100644 index 733aca30..00000000 --- a/reproducibility_project/src/analysis/diffusion.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Facilitate calculation of the diffusion coefficient from simulation data.""" - -import gsd.hoomd -import matplotlib.pyplot as plt -import numpy as np -import unyt as u -from freud.box import Box -from freud.msd import MSD -from scipy import stats - - -def gsd_msd(job, filename="trajectory.gsd", skip=2, stride=1, unwrapped=False): - """Compute the MSD and diffusion coefficient given a Signac Job object. - - The job folder is expected to contain the file "trajectory.gsd" with lengths - in nanometers. - This function is a convenience wrapper for freud's MSD module - https://freud.readthedocs.io/en/latest/modules/msd.html - The diffusion coefficient is calculated as the slope/6 of a linear fit to - the MSD following https://doi.org/10.1002/jcc.21939 - After execution, the files "msd.png" and "msd.txt" are created in the job - folder and the "diffusion_coefficent" (in nm^2/ps) is set in the job.doc. - - Parameters - ---------- - job : signac.contrib.job.Job - The Job object. - filename : str, default "trajectory.gsd" - The relative path (from the job directory) to the trajectory file to - analyze. - skip : int, default 2 - The number of frames from the trajectory to skip. - stride : int, default 1 - The step size between frames. - unwrapped : bool, default False - Whether the trajectory has unwrapped positions. By default, it is - assumed that the positions are not unwrapped and the box and image - information will be used to unwrap the positions. Otherwise only the - particle positions will be used. - - Returns - ------- - freud.msd.MSD - Computed MSD object - """ - msd, timesteps = _gsd_msd(job.fn(filename), skip, stride, unwrapped) - - m, b, r, p, std_err = stats.linregress(timesteps, msd.msd) - - fig, ax = plt.subplots() - ax.plot( - timesteps, - m * timesteps + b, - label=f"linear fit\ny = {m:.1e}x + {b:.1e}\n(r = {r:.3f})", - ) - ax.plot(timesteps, msd.msd, label="MSD") - ax.set_xlabel("$Time (ps)$") - ax.set_ylabel(r"$MSD (nm^{2})$") - ax.set_title("MSD") - - fig.savefig(job.fn("msd.png")) - - np.savetxt(job.fn("msd.txt"), msd.msd) - - # calculated according to - # https://doi.org/10.1002/jcc.21939 - # units nm^2/ps - job.doc["diffusion_coefficient"] = m / 6 - return msd - - -def _gsd_msd(gsdfile, skip, stride, unwrapped): - """Compute the MSD given a GSD file.""" - if not unwrapped: - with gsd.hoomd.open(gsdfile) as trajectory: - boxes = [] - images = [] - positions = [] - timesteps = [] - for frame in trajectory[skip::stride]: - boxes.append(frame.configuration.box) - images.append(frame.particles.image) - positions.append(frame.particles.position) - timesteps.append(frame.configuration.step) - - if not all(all(i == boxes[0]) for i in boxes): - raise ValueError( - "MSD calculation requires that the box size does not change." - ) - msd = MSD(Box.from_box(boxes[0])) - else: - with gsd.hoomd.open(gsdfile) as trajectory: - images = None - positions = [] - timesteps = [] - for frame in trajectory[skip::stride]: - positions.append(frame.particles.position) - timesteps.append(frame.configuration.step) - - msd = MSD() - msd.compute(positions, images=images) - return msd, np.array(timesteps) diff --git a/reproducibility_project/src/engine_input/lammps/.keep b/reproducibility_project/src/engine_input/lammps/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/reproducibility_project/src/engine_input/lammps/UD_scripts/submit.pbs b/reproducibility_project/src/engine_input/lammps/UD_scripts/submit.pbs deleted file mode 100644 index 66129ecb..00000000 --- a/reproducibility_project/src/engine_input/lammps/UD_scripts/submit.pbs +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -l -#PBS -N FILL_IN_NAME -#PBS -m ae -#PBS -M nicholas.craven.76@gmail.com -#PBS -l nodes=1:ppn=16 -#PBS -l walltime=96:00:00 -#PBS -q standard -#PBS -j oe -cd $PBS_O_WORKDIR -module load lammps - -mpirun -np 8 lmp < in.minimize #change name so proper script is run for each molecule -echo $PBS_JOBID $PBS_O_WORKDIR >> ~/job-id-dirs.txt diff --git a/reproducibility_project/src/engine_input/lammps/UD_scripts/submit.slurm b/reproducibility_project/src/engine_input/lammps/UD_scripts/submit.slurm deleted file mode 100644 index a932def0..00000000 --- a/reproducibility_project/src/engine_input/lammps/UD_scripts/submit.slurm +++ /dev/null @@ -1,48 +0,0 @@ -#! /bin/bash -#SBATCH --job-name="rpd-uamethane" -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --time=11:59:59 -#SBATCH --export=ALL -#SBATCH --mail-user=zijiewu@udel.edu -#SBATCH --mail-type=BEGIN,END,FAIL -#SBATCH --requeue -# SBATCH --partition=_workgroup_ -#SBATCH --mem=8G - -export VALET_PATH=$VALET_PATH:$WORKDIR/udsw/valet/etc -vpkg_require jlab-lammps - -OPENMPI_VERSION='unknown' -OMPI_INFO="$(which ompi_info)" -if [ $? -eq 0 -a -n "$OMPI_INFO" ]; then - OPENMPI_VERSION="$(${OMPI_INFO} --version | egrep 'v[0-9]+' | sed 's/^.*v//')" -fi -OPENMPI_FLAGS="--display-map --mca btl ^tcp" -if [ "${WANT_CPU_AFFINITY:-NO}" = "YES" ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --bind-to core" -fi -if [ "${WANT_NPROC:-0}" -gt 0 ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --np ${WANT_NPROC} --map-by node" -fi -if [ "${SHOW_MPI_DEBUGGING:-NO}" = "YES" ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --debug-devel --debug-daemons --display-devel-map --display-devel-allocation --mca mca_verbose 1 --mca coll_base_verbose 1 --mca ras_base_verbose 1 --mca ras_gridengine_debug 1 --mca ras_gridengine_verbose 1 --mca btl_base_verbose 1 --mca mtl_base_verbose 1 --mca plm_base_verbose 1 --mca pls_rsh_debug 1" - if [ "${WANT_CPU_AFFINITY:-NO}" = "YES" ]; then - OPENMPI_FLAGS="${OPENMPI_FLAGS} --report-bindings" - fi -fi - -if [ $((1)) ] - then - echo "GridEngine parameters:" - echo " mpirun = "`which mpirun` - echo " nhosts = $NHOSTS" - echo " nproc = $NSLOTS" - echo " executable = $MY_EXE" - echo " Open MPI vers = $OPENMPI_VERSION" - echo " MPI flags = $OPENMPI_FLAGS" - echo "-- begin OPENMPI run --" - mpirun ${OPENMPI_FLAGS} lmp -in in.minimize - rc=$? - echo "-- end OPENMPI run --" -fi diff --git a/reproducibility_project/src/engine_input/lammps/input_scripts/in.equilibration b/reproducibility_project/src/engine_input/lammps/input_scripts/in.equilibration deleted file mode 100644 index 2fc54e91..00000000 --- a/reproducibility_project/src/engine_input/lammps/input_scripts/in.equilibration +++ /dev/null @@ -1,43 +0,0 @@ -# Lammps script to read in mBuild topologies and perform energy minimization -# Initialization -units real -boundary p p p -atom_style full - -# Assume ff info is included in data file -pair_style lj/cut/coul/cut ${rcut} #modify line from job.sp.r_cut -bond_style harmonic -angle_style harmonic -dihedral_style opls -read_restart minimized.restart-* - -pair_modify shift no - -neighbor 2.5 bin #skin cutoff - -timestep ${tstep} -thermo 1000 -thermo_style custom step temp press pe epair emol ke etotal density - -variable tsample equal ${T} #kelvin modify from job.sp.temperature -variable psample equal ${P}/101.325 #kPa to atm from job.sp.pressure -velocity all create ${T} ${seed} #kelvin modify from job.sp.replicate -# ________________________________________________________________________________________ - -# Equilibrate npt box -variable startstep equal step -variable t equal temp -variable pressure equal press -variable e equal etotal -variable p equal pe -variable k equal ke -variable d equal density -variable startstep0 equal ${startstep} -variable filename string eqlog${startstep0}.txt -fix output_data all print 1000 "${startstep} ${t} ${pressure} $e $p $k $d" file ${filename} title "step temp press etotal pe ke density" -fix integrator all npt temp ${tsample} ${tsample} 100.0 iso ${psample} ${psample} 1000.0 pchain 10 -fix zeromom all momentum 100 linear 1 1 1 rescale -run 1000000 -unfix integrator -write_restart minimized.restart-* -write_restart equilibrated-npt.restart diff --git a/reproducibility_project/src/engine_input/lammps/input_scripts/in.minimize b/reproducibility_project/src/engine_input/lammps/input_scripts/in.minimize deleted file mode 100644 index 7e519219..00000000 --- a/reproducibility_project/src/engine_input/lammps/input_scripts/in.minimize +++ /dev/null @@ -1,43 +0,0 @@ -# Lammps script to read in mBuild topologies and perform energy minimization -# Initialization -units real -boundary p p p -atom_style full - -# Assume ff info is included in data file -pair_style lj/cut/coul/cut ${rcut} #modify cutoff from job.sp.r_cut -bond_style harmonic -angle_style harmonic -dihedral_style opls -read_data box.lammps -pair_modify shift no - -neighbor 2.5 bin #skin cutoff - -timestep 0.1 -thermo 2000 -thermo_style custom step temp press pe epair emol ke etotal density - -variable tsample equal ${T} #kelvin # modify from job.sp.temperature -variable psample equal ${P}/101.325 #kPa to atm job.sp.pressure -velocity all create 140.0 ${seed}+1 #temperature in kelvin -# ________________________________________________________________________________________ - -# Minimize energy -minimize 1e-4 1e-4 1000 1000 -fix integrator all nve/limit 0.1 -fix zeromom all momentum 100 linear 1 1 1 -run 10000 -unfix integrator -minimize 1e-4 1e-4 1000 1000 -fix integrator all nve -run 10000 -unfix integrator -minimize 1e-4 1e-4 1000 1000 -fix integrator all nvt temp ${tsample} ${tsample} 100.0 -run 50000 -timestep ${tstep} -run 50000 -unfix integrator -reset_timestep 0 #reset timestep so the first minimize restart file has a consistent name -write_restart minimized.restart-* diff --git a/reproducibility_project/src/engine_input/lammps/input_scripts/in.production-npt b/reproducibility_project/src/engine_input/lammps/input_scripts/in.production-npt deleted file mode 100644 index e25fce18..00000000 --- a/reproducibility_project/src/engine_input/lammps/input_scripts/in.production-npt +++ /dev/null @@ -1,43 +0,0 @@ -# Lammps script to read in mBuild topologies and perform energy minimization -# Initialization -units real -boundary p p p -atom_style full - -# Assume ff info is included in data file -pair_style lj/cut/coul/cut ${rcut} # read in from job.sp.r_cut -bond_style none -angle_style none -dihedral_style none -read_restart equilibrated-npt.restart - -pair_modify shift no #TODO: Look to make sure this shift is okay - -neighbor 2.5 bin #skin cutoff - -timestep ${tstep} -variable runtime equal 5e6/dt -thermo 1000 -thermo_style custom step temp press pe epair emol ke etotal density - -variable tsample equal ${T} #kelvin, modified by job.sp.temperature -variable psample equal ${P}/101.325 #kPa to atm modified by job.sp.pressure -# ________________________________________________________________________________________ - -# Production -variable startstep equal step -variable t equal temp -variable pressure equal press -variable e equal etotal -variable p equal pe -variable k equal ke -variable d equal density -variable filename string "prlog-npt.txt" -fix output_data all print 1000 "${startstep} ${t} ${pressure} $e $p $k $d" file ${filename} title "step temp press etotal pe ke density" -dump traj1 all xtc 10000 prod-npt.xtc -fix integrator all npt temp ${tsample} ${tsample} 100.0 iso ${psample} ${psample} 1000.0 pchain 10 -fix zeromom all momentum 100 linear 1 1 1 rescale -run ${runtime} -unfix integrator -write_restart production-npt.restart -undump traj1 diff --git a/reproducibility_project/src/engine_input/lammps/input_scripts/in.production-nvt b/reproducibility_project/src/engine_input/lammps/input_scripts/in.production-nvt deleted file mode 100644 index 99cbc267..00000000 --- a/reproducibility_project/src/engine_input/lammps/input_scripts/in.production-nvt +++ /dev/null @@ -1,43 +0,0 @@ -# Lammps script to read in mBuild topologies and perform energy minimization -# Initialization -units real -boundary p p p -atom_style full - -# Assume ff info is included in data file -pair_style lj/cut/coul/cut ${rcut} # read in from job.sp.r_cut -bond_style none -angle_style none -dihedral_style none -read_restart production-npt.restart - -pair_modify shift no #TODO: Look to make sure this shift is okay - -neighbor 2.5 bin #skin cutoff - -timestep ${tstep} -variable runtime equal 5e6/dt -thermo 1000 -thermo_style custom step temp press pe epair emol ke etotal density - -variable tsample equal ${T} #kelvin, modified by job.sp.temperature -variable psample equal ${P}/101.325 #kPa to atm modified by job.sp.pressure -# ________________________________________________________________________________________ - -# Production -variable startstep equal step -variable t equal temp -variable pressure equal press -variable e equal etotal -variable p equal pe -variable k equal ke -variable d equal density -variable filename string "prlog-nvt.txt" -fix output_data all print 1000 "${startstep} ${t} ${pressure} $e $p $k $d" file ${filename} title "step temp press etotal pe ke density" -dump traj1 all xtc 10000 prod-nvt.xtc -fix integrator all nvt temp ${tsample} ${tsample} 100.0 -fix zeromom all momentum 100 linear 1 1 1 rescale -run 3000000 -unfix integrator -write_restart production-nvt.restart -undump traj1 diff --git a/reproducibility_project/src/engine_input/lammps/submission_scripts/__init__.py b/reproducibility_project/src/engine_input/lammps/submission_scripts/__init__.py deleted file mode 100644 index 9945239e..00000000 --- a/reproducibility_project/src/engine_input/lammps/submission_scripts/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""LAMMPS submission scripts.""" diff --git a/reproducibility_project/src/engines/hoomd/__init__.py b/reproducibility_project/src/engines/hoomd/__init__.py deleted file mode 100644 index 970b12a5..00000000 --- a/reproducibility_project/src/engines/hoomd/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""HOOMD project module.""" diff --git a/reproducibility_project/src/engines/hoomd/project.py b/reproducibility_project/src/engines/hoomd/project.py deleted file mode 100644 index 56b860a0..00000000 --- a/reproducibility_project/src/engines/hoomd/project.py +++ /dev/null @@ -1,592 +0,0 @@ -"""Setup for signac, signac-flow, signac-dashboard for this study.""" - -import datetime -import os -import pathlib - -import flow -import numpy as np -from flow import FlowProject -from flow.environment import DefaultSlurmEnvironment - -rigid_molecules = ["waterSPCE", "benzeneUA"] - -header_dict = { - "Simulationtimestep": "timestep", - "Simulationtps": "tps", - "mdcomputeThermodynamicQuantitieskinetic_energy": "kinetic_energy", - "mdcomputeThermodynamicQuantitiespotential_energy": "potential_energy", - "mdcomputeThermodynamicQuantitiespressure": "pressure", - "mdcomputeThermodynamicQuantitieskinetic_temperature": "temperature", - "mdcomputeThermodynamicQuantitiesvolume": "volume", - "mdpairLJenergy": "LJ_energy", - "mdpairEwaldenergy": "Ewald_energy", - "mdlong_rangepppmCoulombenergy": "Coulomb_energy", - "mdspecial_pairLJenergy": "LJ_1-4_energy", - "mdspecial_pairCoulombenergy": "Coulomb_1-4_energy", - "mdbondHarmonicenergy": "bond_energy", - "mdangleHarmonicenergy": "angle_energy", - "mddihedralOPLSenergy": "dihedral_energy", - "Statustime_remaining": "time_remaining", -} - - -class Project(FlowProject): - """Subclass of FlowProject to provide custom methods and attributes.""" - - def __init__(self): - super().__init__() - current_path = pathlib.Path(os.getcwd()).absolute() - self.data_dir = current_path.parents[1] / "data" - self.ff_fn = self.data_dir / "forcefield.xml" - - -class Fry(DefaultSlurmEnvironment): - """Subclass of DefaultSlurmEnvironment for BSU's Fry cluster.""" - - hostname_pattern = "fry.boisestate.edu" - template = "fry.sh" - - @classmethod - def add_args(cls, parser): - """Add command line arguments to the submit call.""" - parser.add_argument( - "--partition", - default="batch", - help="Specify the partition to submit to.", - ) - parser.add_argument("--nodelist", help="Specify the node to submit to.") - - -# The MOSDEF_PYTHON environment variable is set by running -# echo "export MOSDEF_PYTHON=$(which python)" >> ~/.bashrc -# with the mosdef-study38 conda env active -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.post(lambda j: j.doc.get("shrink_finished")) -def run_shrink(job): - """Initialize volume for simulation with HOOMD-blue.""" - run_hoomd(job, "shrink") - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("npt_eq")) -@Project.post(lambda j: j.doc.get("nvt_finished")) -def run_nvt(job): - """Run an NVT simulation with HOOMD-blue.""" - run_hoomd(job, "nvt", restart=job.isfile("trajectory-nvt.gsd")) - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("shrink_finished")) -@Project.post(lambda j: j.doc.get("npt_finished")) -def run_npt(job): - """Run an NPT simulation with HOOMD-blue.""" - run_hoomd(job, "npt", restart=job.isfile("trajectory-npt.gsd")) - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("npt_finished")) -@Project.post(lambda j: j.doc.get("npt_eq")) -def check_equilibration_npt(job): - """Check the equilibration of the NPT simulation.""" - job.doc.npt_finished = check_equilibration(job, "npt", "volume") - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("nvt_finished")) -@Project.post(lambda j: j.doc.get("nvt_eq")) -def check_equilibration_nvt(job): - """Check the equilibration of the NVT simulation.""" - job.doc.nvt_finished = check_equilibration(job, "nvt", "potential_energy") - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("nvt_eq")) -@Project.post(lambda j: j.doc.get("post_processed")) -def post_process(job): - """Run post-processing on the log files.""" - from shutil import copy - - import numpy.lib.recfunctions as rf - import unyt as u - - for filename in ["log-npt-raw.txt", "log-nvt-raw.txt"]: - rawlogfile = job.fn(filename) - logfile = job.fn(filename.replace("-raw", "")) - - data = np.genfromtxt(rawlogfile, names=True) - data = clean_data(data) - - # Clean up headers - for k, v in header_dict.items(): - data = rf.rename_fields(data, {k: v}) - - system_mass = job.sp.mass * u.amu * job.sp.N_liquid - volume = data["volume"] * u.nm**3 - density = (system_mass / volume).to("g/cm**3") - kB = 0.00831446262 # kJ/(mol K) - pressure_factor = float((1 * u.kJ / u.mol / u.nm**3).to("kPa")) - - data = rf.drop_fields(data, ["time_remaining"]) - data["temperature"] /= kB - data["pressure"] *= pressure_factor - data = rf.append_fields( - data, "density", np.array(density), usemask=False - ) - np.savetxt(logfile, data, header=" ".join(data.dtype.names)) - job.doc.post_processed = True - - -def run_hoomd(job, method, restart=False): - """Run a simulation with HOOMD-blue.""" - import foyer - import git - import gsd.hoomd - import hoomd - import hoomd.md - import numpy as np - import unyt as u - from mbuild.formats.hoomd_forcefield import create_hoomd_forcefield - - from reproducibility_project.src.molecules.system_builder import ( - construct_system, - get_molecule, - ) - from reproducibility_project.src.utils.forcefields import load_ff - from reproducibility_project.src.utils.rigid import moit - - repo = git.Repo(search_parent_directories=True) - sha = repo.head.object.hexsha - print(job.sp.molecule) - print(f"git commit: {sha}\n") - if method not in ["npt", "nvt", "shrink"]: - raise ValueError("Method must be 'nvt', 'npt' or 'shrink'.") - - # For rigid molecules, we need to create an initial snapshot with only the - # rigid body centers - # Only the number matters at this point--all other attributes of the - # snapshot will be adjusted later. - if job.sp["molecule"] in rigid_molecules: - print("Rigid body") - isrigid = True - init_snap = hoomd.Snapshot() - init_snap.particles.types = ["R"] - N_mols = job.sp["N_liquid"] - init_snap.particles.N = N_mols - else: - isrigid = False - init_snap = None - - # This structure will only be used for the initial npt run, - # but we need forcefield info for all sims. - # Ignore the vapor box - # Initializing at high density causes issues, so instead we initialize - # with box expanded by factor - filled_box, _ = construct_system( - job.sp, scale_liq_box=2, fix_orientation=isrigid - ) - - ff = load_ff(job.sp.forcefield_name) - structure = ff.apply(filled_box) - - # ref_distance: 10 angstrom -> 1 nm - # ref_energy: 1/4.184 kcal/mol -> 1 kJ/mol - # ref_mass: 0.9999938574 dalton -> 1 amu - d = 10 - e = 1 / 4.184 - m = 0.9999938574 - - snapshot, forcefield, ref_vals = create_hoomd_forcefield( - structure, - ref_distance=d, - ref_energy=e, - ref_mass=m, - r_cut=job.sp.r_cut, - init_snap=init_snap, - pppm_kwargs={"Nx": 64, "Ny": 64, "Nz": 64, "order": 7}, - ) - print("Snapshot created") - - # If the molecule is constrained, add distance constraints to - # bonded particles in snapshot - if "constrain" in job.sp.molecule: - print("Constrained bonds") - snapshot.constraints.N = snapshot.bonds.N - snapshot.constraints.group[:] = snapshot.bonds.group[:] - constraint_vals = np.ones(snapshot.bonds.N) * 0.154 - snapshot.constraints.value[:] = constraint_vals - - # I also checked that the starting positions on each bond were - # close enough to the constraint value: - # bond_pos = snapshot.particles.position[snapshot.bonds.group] - # bond_dists = np.linalg.norm(bond_pos[:,0] - bond_pos[:,1], axis=1) - # np.allclose(constraint_vals, bond_dists) - - constrain_dist = hoomd.md.constrain.Distance() - - # Adjust the snapshot rigid bodies - if isrigid: - # number of particles per molecule - N_p = get_molecule(job.sp).n_particles - mol_inds = [ - np.arange(N_mols + i * N_p, N_mols + i * N_p + N_p) - for i in range(N_mols) - ] - for i, inds in enumerate(mol_inds): - total_mass = np.sum(snapshot.particles.mass[inds]) - # set the rigid body position at the center of mass - com = ( - np.sum( - snapshot.particles.position[inds] - * snapshot.particles.mass[inds, np.newaxis], - axis=0, - ) - / total_mass - ) - snapshot.particles.position[i] = com - # set the body attribute for the rigid center and its constituents - snapshot.particles.body[i] = i - snapshot.particles.body[inds] = i * np.ones_like(inds) - # set the rigid body center's mass - snapshot.particles.mass[i] = np.sum(snapshot.particles.mass[inds]) - # set moment of inertia - snapshot.particles.moment_inertia[i] = moit( - snapshot.particles.position[inds], - snapshot.particles.mass[inds], - center=com, - ) - - # delete the harmonic bond and angle potentials - remove = [ - f - for f in forcefield - if isinstance(f, hoomd.md.bond.Harmonic) - or isinstance(f, hoomd.md.angle.Harmonic) - or isinstance(f, hoomd.md.dihedral.Harmonic) - or isinstance(f, hoomd.md.dihedral.OPLS) - ] - for f in remove: - forcefield.remove(f) - - # update the neighborlist exclusions for rigid - # forcefield[0] is LJ pair force and all nlist objects are connected - forcefield[0].nlist.exclusions = ["body"] - - # update the neighborlist exclusions for pentane and methane - # pentane's wont be set automatically because the scaling is 0 - # and the default (bond, 1-3) is unecessary and raises a warning for methane - if job.sp.molecule.startswith("pentane"): - forcefield[0].nlist.exclusions = ["bond", "1-3", "1-4"] - elif job.sp.molecule == "methaneUA": - forcefield[0].nlist.exclusions = [] - - if job.sp.get("long_range_correction") == "energy_pressure": - for force in forcefield: - if isinstance(force, hoomd.md.pair.LJ): - force.tail_correction = True - print(f"{force} tail_correction set to {force.tail_correction}") - - device = hoomd.device.auto_select() - print(f"Running HOOMD version {hoomd.version.version}", flush=True) - if isinstance(device, hoomd.device.GPU): - print("HOOMD is running on GPU", flush=True) - print(f"GPU api version {hoomd.version.gpu_api_version}", flush=True) - else: - print("HOOMD is running on CPU", flush=True) - - if method == "shrink": - print("Starting shrink", flush=True) - sim = hoomd.Simulation(device=device, seed=job.sp.replica) - sim.create_state_from_snapshot(snapshot) - hoomd.write.GSD.write( - state=sim.state, filename=job.fn("init.gsd"), mode="wb" - ) - filled_box.save(job.fn("starting_compound.json")) - initgsd = job.fn("init.gsd") - - elif method == "npt": - print("Starting NPT", flush=True) - if restart: - print("Restarting from last frame of existing gsd", flush=True) - initgsd = job.fn("trajectory-npt.gsd") - else: - # npt overwrites snapshot information with snapshot from shrink run - initgsd = job.fn("trajectory-shrink.gsd") - - else: - print("Starting NVT", flush=True) - if restart: - print("Restarting from last frame of existing gsd", flush=True) - initgsd = job.fn("trajectory-nvt.gsd") - else: - # nvt overwrites snapshot information with snapshot from npt run - initgsd = job.fn("trajectory-npt.gsd") - - if restart: - writemode = "a" - else: - writemode = "w" - - sim = hoomd.Simulation(device=device, seed=job.sp.replica) - sim.create_state_from_gsd(initgsd) - - if isrigid: - # Because we use the fix_orientation flag with fill box, we can safely - # assume that all the rigid body constituent particles have the same - # orientation around the rigid body center. Therefore we can define all - # rigid bodies using just the first one - rigid = hoomd.md.constrain.Rigid() - inds = mol_inds[0] - - r_pos = snapshot.particles.position[0] - c_pos = snapshot.particles.position[inds] - c_pos -= r_pos - c_pos = [tuple(i) for i in c_pos] - - c_types = [ - snapshot.particles.types[i] for i in snapshot.particles.typeid[inds] - ] - - c_orient = [tuple(i) for i in snapshot.particles.orientation[inds]] - - c_charge = [i for i in snapshot.particles.charge[inds]] - - c_diam = [i for i in snapshot.particles.diameter[inds]] - - rigid.body["R"] = { - "constituent_types": c_types, - "positions": c_pos, - "orientations": c_orient, - "charges": c_charge, - "diameters": c_diam, - } - - for force in forcefield: - if isinstance(force, hoomd.md.pair.LJ): - for t in snapshot.particles.types: - force.params[("R", t)] = dict(epsilon=0, sigma=0) - force.r_cut[("R", t)] = 0 - - _all = hoomd.filter.Rigid(("center", "free")) - else: - _all = hoomd.filter.All() - - gsd_writer = hoomd.write.GSD( - filename=job.fn(f"trajectory-{method}.gsd"), - trigger=hoomd.trigger.Periodic(10000), - mode=f"{writemode}b", - dynamic=["momentum"], - ) - sim.operations.writers.append(gsd_writer) - - logger = hoomd.logging.Logger(categories=["scalar", "string"]) - logger.add(sim, quantities=["timestep", "tps"]) - thermo_props = hoomd.md.compute.ThermodynamicQuantities(filter=_all) - sim.operations.computes.append(thermo_props) - logger.add( - thermo_props, - quantities=[ - "kinetic_energy", - "potential_energy", - "pressure", - "kinetic_temperature", - "volume", - ], - ) - - status = Status(sim) - logger[("Status", "time_remaining")] = (status, "time_remaining", "string") - - for f in forcefield: - logger.add(f, quantities=["energy"]) - - table_file = hoomd.write.Table( - output=open( - job.fn(f"log-{method}-raw.txt"), mode=f"{writemode}", newline="\n" - ), - trigger=hoomd.trigger.Periodic(period=1000), - logger=logger, - ) - sim.operations.writers.append(table_file) - - if job.sp.molecule == "ethanolAA": - # ethanol needs a smaller timestep to capture fast oscillations of - # explicit hydrogen bonds (period 0.0118 time units) - dt = 0.0005 - else: - dt = 0.001 - if isrigid: - integrator = hoomd.md.Integrator(dt=dt, integrate_rotational_dof=True) - integrator.rigid = rigid - else: - integrator = hoomd.md.Integrator(dt=dt) - if "constrain" in job.sp.molecule: - integrator.constraints = [constrain_dist] - integrator.forces = forcefield - - # convert temp in K to kJ/mol - kT = float( - (job.sp.temperature * u.K).to_equivalent("kJ/mol", "thermal").value - ) - print(f"kT = {kT}") - - # start with high tau and tauS - tau = 0.5 # 1000dt (ethanol) or 500dt - tauS = 1 # 2000dt (ethanol) or 1000dt - - if method == "npt": - # convert pressure to unit system - pressure = float((job.sp.pressure * u.kPa).to("kJ/(mol*nm**3)").value) - print(f"Pressure = {pressure}") - integrator_method = hoomd.md.methods.NPT( - filter=_all, - kT=kT, - tau=tau, - S=pressure, - tauS=tauS, - couple="xyz", - ) - else: - # shrink and nvt both use nvt - integrator_method = hoomd.md.methods.NVT(filter=_all, kT=kT, tau=tau) - - integrator.methods = [integrator_method] - sim.operations.integrator = integrator - if not restart: - sim.state.thermalize_particle_momenta(filter=_all, kT=kT) - - if method == "npt": - # only run with high tauS if we are starting from scratch - if not restart: - steps = 1e6 - print( - f"Running {steps:.0e} with tau: {integrator.methods[0].tau} " - f"and tauS: {integrator.methods[0].tauS}" - ) - sim.run(steps) - print("Done") - - else: - if not restart: - # Shrink and NVT both use NVT method - if method == "shrink": - # shrink to the desired box length - L = job.sp.box_L_liq - shrink_steps = 1e5 - else: - # The target volume should be the average volume from NPT - target_volume = job.doc.avg_volume - L = target_volume ** (1 / 3) - shrink_steps = 2e4 - - # Shrink step follows this example - # https://hoomd-blue.readthedocs.io/en/latest/tutorial/ - # 01-Introducing-Molecular-Dynamics/03-Compressing-the-System.html - ramp = hoomd.variant.Ramp( - A=0, B=1, t_start=sim.timestep, t_ramp=int(shrink_steps) - ) - initial_box = sim.state.box - final_box = hoomd.Box(Lx=L, Ly=L, Lz=L) - box_resize_trigger = hoomd.trigger.Periodic(10) - box_resize = hoomd.update.BoxResize( - box1=initial_box, - box2=final_box, - variant=ramp, - trigger=box_resize_trigger, - ) - sim.operations.updaters.append(box_resize) - print( - f"Running shrink {shrink_steps:.0e} with tau: " - f"{integrator.methods[0].tau}" - ) - sim.run(shrink_steps + 1) - print("Done") - assert sim.state.box == final_box - sim.operations.updaters.remove(box_resize) - - integrator.methods[0].tau = 0.1 - if method != "shrink": - steps = 2e7 - if method == "npt": - integrator.methods[0].tauS = 1 - print( - f"Running {steps:.0e} with tau: {integrator.methods[0].tau} " - f"and tauS: {integrator.methods[0].tauS}" - ) - else: - print(f"Running {steps:.0e} with tau: {integrator.methods[0].tau}") - sim.run(steps) - else: - # Try adding a short temperature equilibration step after shrink - steps = 1e4 - print(f"Running {steps:.0e} with tau: {integrator.methods[0].tau}") - sim.run(steps) - - job.doc[f"{method}_finished"] = True - print("Finished", flush=True) - - -def check_equilibration(job, method, eq_property, min_t0=100): - """Check whether a simulation is equilibrated.""" - import numpy as np - from pymbar.timeseries import subsampleCorrelatedData as subsample - - import reproducibility_project.src.analysis.equilibration as eq - - data = np.genfromtxt(job.fn(f"log-{method}-raw.txt"), names=True) - data = clean_data(data) - prop_data = data[f"mdcomputeThermodynamicQuantities{eq_property}"] - iseq, _, _, _ = eq.is_equilibrated(prop_data) - if iseq: - uncorr, t0, g, N = eq.trim_non_equilibrated(prop_data) - # Sometimes the trim_non_equilibrated function does not cut off enough - # of the early fluctuating data - if t0 < min_t0: - uncorr = prop_data[min_t0:] - indices = subsample(uncorr, g=g, conservative=True) - job.doc[f"avg_{eq_property}"] = np.average(prop_data[indices]) - job.doc[f"std_{eq_property}"] = np.std(prop_data[indices]) - job.doc[f"{method}_eq"] = iseq - return iseq - - -def clean_data(data): - """Delete rows in numpy array which contain nan values. - - The HOOMD Table file writer always writes a header, but for restarted jobs, - this header is in the middle of the file, which creates nan values. - """ - return np.delete( - data, np.where(np.isnan(data["Simulationtimestep"]))[0], axis=0 - ) - - -class Status: - """Monitor the status of a simulation.""" - - def __init__(self, sim): - self.sim = sim - - @property - def seconds_remaining(self): - """Compute the seconds remaining based on the current TPS.""" - try: - return (self.sim.final_timestep - self.sim.timestep) / self.sim.tps - except ZeroDivisionError: - return 0 - - @property - def time_remaining(self): - """Estimate the time remaining.""" - return str(datetime.timedelta(seconds=self.seconds_remaining)) - - -if __name__ == "__main__": - pr = Project() - pr.main() diff --git a/reproducibility_project/src/engines/hoomd/project_rahman.py b/reproducibility_project/src/engines/hoomd/project_rahman.py deleted file mode 100644 index 5c029cf4..00000000 --- a/reproducibility_project/src/engines/hoomd/project_rahman.py +++ /dev/null @@ -1,552 +0,0 @@ -"""Setup for signac, signac-flow, signac-dashboard for this study.""" - -import datetime -import os -import pathlib - -import flow -import numpy as np -from flow import FlowProject -from flow.environment import DefaultSlurmEnvironment - -rigid_molecules = ["waterSPCE", "benzeneUA"] - - -# class Project(FlowProject): -# """Subclass of FlowProject to provide custom methods and attributes.""" - -# def __init__(self): -# super().__init__() -# current_path = pathlib.Path(os.getcwd()).absolute() -# self.data_dir = current_path.parents[1] / "data" -# self.ff_fn = self.data_dir / "forcefield.xml" - - -# class Fry(DefaultSlurmEnvironment): -# """Subclass of DefaultSlurmEnvironment for BSU's Fry cluster.""" - -# hostname_pattern = "fry.boisestate.edu" -# template = "fry.sh" - -# @classmethod -# def add_args(cls, parser): -# """Add command line arguments to the submit call.""" -# parser.add_argument( -# "--partition", -# default="batch", -# help="Specify the partition to submit to.", -# ) -# parser.add_argument("--nodelist", help="Specify the node to submit to.") - - -class Project(flow.FlowProject): - """Subclass of FlowProject to provide custom methods and attributes.""" - - def __init__(self): - super().__init__() - current_path = pathlib.Path(os.getcwd()).absolute() - self.data_dir = current_path.parents[1] / "data" - self.ff_fn = self.data_dir / "forcefield.xml" - - -class Rahman(DefaultSlurmEnvironment): - """Subclass of DefaultSlurmEnvironment for VU's Rahman cluster.""" - - template = "rahman_hoomd.sh" - - @classmethod - def add_args(cls, parser): - """Add command line arguments to the submit call.""" - parser.add_argument( - "--walltime", - type=float, - default=96, - help="Walltime for this submission", - ) - - -# The MOSDEF_PYTHON environment variable is set by running -# echo "export MOSDEF_PYTHON=$(which python)" >> ~/.bashrc -# with the mosdef-study38 conda env active -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.post(lambda j: j.doc.get("shrink_finished")) -def run_shrink(job): - """Initialize volume for simulation with HOOMD-blue.""" - run_hoomd(job, "shrink") - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("npt_eq")) -@Project.post(lambda j: j.doc.get("nvt_finished")) -def run_nvt(job): - """Run an NVT simulation with HOOMD-blue.""" - run_hoomd(job, "nvt", restart=job.isfile("trajectory-nvt.gsd")) - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("shrink_finished")) -@Project.post(lambda j: j.doc.get("npt_finished")) -def run_npt(job): - """Run an NPT simulation with HOOMD-blue.""" - run_hoomd(job, "npt", restart=job.isfile("trajectory-npt.gsd")) - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("npt_finished")) -@Project.post(lambda j: j.doc.get("npt_eq")) -def check_equilibration_npt(job): - """Check the equilibration of the NPT simulation.""" - job.doc.npt_finished = check_equilibration(job, "npt", "volume") - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("nvt_finished")) -@Project.post(lambda j: j.doc.get("nvt_eq")) -def check_equilibration_nvt(job): - """Check the equilibration of the NVT simulation.""" - job.doc.nvt_finished = check_equilibration(job, "nvt", "potential_energy") - - -@Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -@Project.pre(lambda j: j.sp.engine == "hoomd") -@Project.pre(lambda j: j.doc.get("nvt_eq")) -@Project.post(lambda j: j.doc.get("post_processed")) -def post_process(job): - """Run post-processing on the log files.""" - from shutil import copy - - import numpy.lib.recfunctions as rf - import unyt as u - - for logfile in [job.fn("log-npt.txt"), job.fn("log-nvt.txt")]: - # Make a copy, just in case - backup = f"{logfile}.bkup" - if not os.path.exists(backup): - copy(logfile, backup) - - data = np.genfromtxt(logfile, names=True) - data = clean_data(data) - - system_mass = job.sp.mass * u.amu * job.sp.N_liquid - volume = data["volume"] * u.nm**3 - density = (system_mass / volume).to("g/cm**3") - kB = 0.00831446262 # kJ/(mol K) - pressure_factor = float((1 * u.kJ / u.mol / u.nm**3).to("kPa")) - - data = rf.drop_fields(data, ["time_remaining"]) - data = rf.rename_fields(data, {"kinetic_temperature": "temperature"}) - data["temperature"] /= kB - data["pressure"] *= pressure_factor - data = rf.append_fields( - data, "density", np.array(density), usemask=False - ) - np.savetxt(logfile, data, header=" ".join(data.dtype.names)) - job.doc.post_processed = True - - -def run_hoomd(job, method, restart=False): - """Run a simulation with HOOMD-blue.""" - import foyer - import gsd.hoomd - import hoomd - import hoomd.md - import numpy as np - import unyt as u - from mbuild.formats.hoomd_forcefield import create_hoomd_forcefield - - from reproducibility_project.src.molecules.system_builder import ( - construct_system, - get_molecule, - ) - from reproducibility_project.src.utils.forcefields import load_ff - from reproducibility_project.src.utils.rigid import moit - - print(job.sp.molecule) - if method not in ["npt", "nvt", "shrink"]: - raise ValueError("Method must be 'nvt', 'npt' or 'shrink'.") - - # For rigid molecules, we need to create an initial snapshot with only the - # rigid body centers - # Only the number matters at this point--all other attributes of the - # snapshot will be adjusted later. - if job.sp["molecule"] in rigid_molecules: - isrigid = True - init_snap = hoomd.Snapshot() - init_snap.particles.types = ["R"] - N_mols = job.sp["N_liquid"] - init_snap.particles.N = N_mols - else: - isrigid = False - init_snap = None - - # This structure will only be used for the initial npt run, - # but we need forcefield info for all sims. - # Ignore the vapor box - # Initializing at high density causes issues, so instead we initialize - # with box expanded by factor - filled_box, _ = construct_system( - job.sp, scale_liq_box=2, fix_orientation=isrigid - ) - - ff = load_ff(job.sp.forcefield_name) - structure = ff.apply(filled_box) - - # ref_distance: 10 angstrom -> 1 nm - # ref_energy: 1/4.184 kcal/mol -> 1 kJ/mol - # ref_mass: 0.9999938574 dalton -> 1 amu - d = 10 - e = 1 / 4.184 - m = 0.9999938574 - - snapshot, forcefield, ref_vals = create_hoomd_forcefield( - structure, - ref_distance=d, - ref_energy=e, - ref_mass=m, - r_cut=job.sp.r_cut, - init_snap=init_snap, - pppm_kwargs={"Nx": 64, "Ny": 64, "Nz": 64, "order": 7}, - ) - - # update the neighborlist exclusions for pentane and benzene - # these wont be set automatically because their scaling is 0 - # forcefield[0] is LJ pair force and all nlist objects are connected - if job.sp.molecule == "benzeneUA" or job.sp.molecule == "pentaneUA": - forcefield[0].nlist.exclusions = ["bond", "1-3", "1-4"] - if job.sp.molecule == "methaneUA": - forcefield[0].nlist.exclusions = [] - - # Adjust the snapshot rigid bodies - if isrigid: - # number of particles per molecule - N_p = get_molecule(job.sp).n_particles - mol_inds = [ - np.arange(N_mols + i * N_p, N_mols + i * N_p + N_p) - for i in range(N_mols) - ] - for i, inds in enumerate(mol_inds): - total_mass = np.sum(snapshot.particles.mass[inds]) - # set the rigid body position at the center of mass - com = ( - np.sum( - snapshot.particles.position[inds] - * snapshot.particles.mass[inds, np.newaxis], - axis=0, - ) - / total_mass - ) - snapshot.particles.position[i] = com - # set the body attribute for the rigid center and its constituents - snapshot.particles.body[i] = i - snapshot.particles.body[inds] = i * np.ones_like(inds) - # set the rigid body center's mass - snapshot.particles.mass[i] = np.sum(snapshot.particles.mass[inds]) - # set moment of inertia - snapshot.particles.moment_inertia[i] = moit( - snapshot.particles.position[inds], - snapshot.particles.mass[inds], - center=com, - ) - - # delete the harmonic bond and angle potentials - remove = [ - f - for f in forcefield - if isinstance(f, hoomd.md.bond.Harmonic) - or isinstance(f, hoomd.md.angle.Harmonic) - or isinstance(f, hoomd.md.dihedral.Harmonic) - or isinstance(f, hoomd.md.dihedral.OPLS) - ] - for f in remove: - forcefield.remove(f) - - # update the neighborlist exclusions for rigid - forcefield[0].nlist.exclusions = ["body"] - - if job.sp.get("long_range_correction") == "energy_pressure": - for force in forcefield: - if isinstance(force, hoomd.md.pair.LJ): - force.tail_correction = True - print(f"{force} tail_correction set to {force.tail_correction}") - - device = hoomd.device.auto_select() - print(f"Running HOOMD version {hoomd.version.version}", flush=True) - if isinstance(device, hoomd.device.GPU): - print("HOOMD is running on GPU", flush=True) - print(f"GPU api version {hoomd.version.gpu_api_version}", flush=True) - else: - print("HOOMD is running on CPU", flush=True) - - if method == "shrink": - print("Starting shrink", flush=True) - sim = hoomd.Simulation(device=device, seed=job.sp.replica) - sim.create_state_from_snapshot(snapshot) - hoomd.write.GSD.write( - state=sim.state, filename=job.fn("init.gsd"), mode="wb" - ) - filled_box.save(job.fn("starting_compound.json")) - initgsd = job.fn("init.gsd") - - elif method == "npt": - print("Starting NPT", flush=True) - if restart: - print("Restarting from last frame of existing gsd", flush=True) - initgsd = job.fn("trajectory-npt.gsd") - else: - # npt overwrites snapshot information with snapshot from shrink run - initgsd = job.fn("trajectory-shrink.gsd") - - else: - print("Starting NVT", flush=True) - if restart: - print("Restarting from last frame of existing gsd", flush=True) - initgsd = job.fn("trajectory-nvt.gsd") - else: - # nvt overwrites snapshot information with snapshot from npt run - initgsd = job.fn("trajectory-npt.gsd") - - if restart: - writemode = "a" - else: - writemode = "w" - - sim = hoomd.Simulation(device=device, seed=job.sp.replica) - sim.create_state_from_gsd(initgsd) - - if isrigid: - # Because we use the fix_orientation flag with fill box, we can safely - # assume that all the rigid body constituent particles have the same - # orientation around the rigid body center. Therefore we can define all - # rigid bodies using just the first one - rigid = hoomd.md.constrain.Rigid() - inds = mol_inds[0] - - r_pos = snapshot.particles.position[0] - c_pos = snapshot.particles.position[inds] - c_pos -= r_pos - c_pos = [tuple(i) for i in c_pos] - - c_types = [ - snapshot.particles.types[i] for i in snapshot.particles.typeid[inds] - ] - - c_orient = [tuple(i) for i in snapshot.particles.orientation[inds]] - - c_charge = [i for i in snapshot.particles.charge[inds]] - - c_diam = [i for i in snapshot.particles.diameter[inds]] - - rigid.body["R"] = { - "constituent_types": c_types, - "positions": c_pos, - "orientations": c_orient, - "charges": c_charge, - "diameters": c_diam, - } - - for force in forcefield: - if isinstance(force, hoomd.md.pair.LJ): - for t in snapshot.particles.types: - force.params[("R", t)] = dict(epsilon=0, sigma=0) - force.r_cut[("R", t)] = 0 - - _all = hoomd.filter.Rigid(("center", "free")) - else: - _all = hoomd.filter.All() - - gsd_writer = hoomd.write.GSD( - filename=job.fn(f"trajectory-{method}.gsd"), - trigger=hoomd.trigger.Periodic(10000), - mode=f"{writemode}b", - dynamic=["property", "momentum"], - ) - sim.operations.writers.append(gsd_writer) - - logger = hoomd.logging.Logger(categories=["scalar", "string"]) - logger.add(sim, quantities=["timestep", "tps"]) - thermo_props = hoomd.md.compute.ThermodynamicQuantities(filter=_all) - sim.operations.computes.append(thermo_props) - logger.add( - thermo_props, - quantities=[ - "kinetic_energy", - "potential_energy", - "pressure", - "kinetic_temperature", - "volume", - ], - ) - - status = Status(sim) - logger[("Status", "time_remaining")] = (status, "time_remaining", "string") - table_file = hoomd.write.Table( - output=open( - job.fn(f"log-{method}.txt"), mode=f"{writemode}", newline="\n" - ), - trigger=hoomd.trigger.Periodic(period=1000), - logger=logger, - max_header_len=7, - ) - sim.operations.writers.append(table_file) - - dt = 0.001 - if isrigid: - integrator = hoomd.md.Integrator(dt=dt, integrate_rotational_dof=True) - integrator.rigid = rigid - else: - integrator = hoomd.md.Integrator(dt=dt) - integrator.forces = forcefield - - # convert temp in K to kJ/mol - kT = (job.sp.temperature * u.K).to_equivalent("kJ/mol", "thermal").value - - # start with high tau and tauS - tau = 1000 * dt - tauS = 5000 * dt - - if method == "npt": - # convert pressure to unit system - pressure = (job.sp.pressure * u.kPa).to("kJ/(mol*nm**3)").value - integrator_method = hoomd.md.methods.NPT( - filter=_all, - kT=kT, - tau=tau, - S=pressure, - tauS=tauS, - couple="xyz", - ) - else: - # shrink and nvt both use nvt - integrator_method = hoomd.md.methods.NVT(filter=_all, kT=kT, tau=tau) - - integrator.methods = [integrator_method] - sim.operations.integrator = integrator - if not restart: - sim.state.thermalize_particle_momenta(filter=_all, kT=kT) - - if method == "npt": - # only run with high tauS if we are starting from scratch - if not restart: - steps = 1e6 - print( - f"""Running {steps} with tau: {integrator.methods[0].tau} and - tauS: {integrator.methods[0].tauS}""" - ) - sim.run(steps) - print("Done") - integrator.methods[0].tauS = 1000 * dt - integrator.methods[0].tau = 100 * dt - else: - # Shrink and NVT both use NVT method - if method == "shrink": - # shrink to the desired box length - L = job.sp.box_L_liq - shrink_steps = 1e5 - else: - # The target volume should be the average volume from NPT - target_volume = job.doc.avg_volume - L = target_volume ** (1 / 3) - shrink_steps = 2e4 - - if not restart: - # Shrink and nvt methods both use shrink step - # Shrink step follows this example - # https://hoomd-blue.readthedocs.io/en/latest/tutorial/ - # 01-Introducing-Molecular-Dynamics/03-Compressing-the-System.html - ramp = hoomd.variant.Ramp( - A=0, B=1, t_start=sim.timestep, t_ramp=int(shrink_steps) - ) - initial_box = sim.state.box - final_box = hoomd.Box(Lx=L, Ly=L, Lz=L) - box_resize_trigger = hoomd.trigger.Periodic(10) - box_resize = hoomd.update.BoxResize( - box1=initial_box, - box2=final_box, - variant=ramp, - trigger=box_resize_trigger, - ) - sim.operations.updaters.append(box_resize) - print( - f"Running shrink {shrink_steps} with tau: {integrator.methods[0].tau}" - ) - sim.run(shrink_steps + 1) - print("Done") - assert sim.state.box == final_box - sim.operations.updaters.remove(box_resize) - integrator.methods[0].tau = 100 * dt - - if method != "shrink": - steps = 5e6 - if method == "npt": - print( - f"""Running {steps} with tau: {integrator.methods[0].tau} and - tauS: {integrator.methods[0].tauS}""" - ) - else: - print(f"Running {steps} with tau: {integrator.methods[0].tau}") - sim.run(steps) - job.doc[f"{method}_finished"] = True - print("Finished", flush=True) - - -def check_equilibration(job, method, eq_property, min_t0=100): - """Check whether a simulation is equilibrated.""" - import numpy as np - from pymbar.timeseries import subsampleCorrelatedData as subsample - - import reproducibility_project.src.analysis.equilibration as eq - - data = np.genfromtxt(job.fn(f"log-{method}.txt"), names=True) - data = clean_data(data) - prop_data = data[eq_property] - iseq, _, _, _ = eq.is_equilibrated(prop_data) - if iseq: - uncorr, t0, g, N = eq.trim_non_equilibrated(prop_data) - # Sometimes the trim_non_equilibrated function does not cut off enough - # of the early fluctuating data - if t0 < min_t0: - uncorr = prop_data[min_t0:] - indices = subsample(uncorr, g=g, conservative=True) - job.doc[f"avg_{eq_property}"] = np.average(prop_data[indices]) - job.doc[f"std_{eq_property}"] = np.std(prop_data[indices]) - job.doc[f"{method}_eq"] = iseq - return iseq - - -def clean_data(data): - """Delete rows in numpy array which contain nan values. - - The HOOMD Table file writer always writes a header, but for restarted jobs, - this header is in the middle of the file, which creates nan values. - """ - return np.delete(data, np.where(np.isnan(data["timestep"]))[0], axis=0) - - -class Status: - """Monitor the status of a simulation.""" - - def __init__(self, sim): - self.sim = sim - - @property - def seconds_remaining(self): - """Compute the seconds remaining based on the current TPS.""" - try: - return (self.sim.final_timestep - self.sim.timestep) / self.sim.tps - except ZeroDivisionError: - return 0 - - @property - def time_remaining(self): - """Estimate the time remaining.""" - return str(datetime.timedelta(seconds=self.seconds_remaining)) - - -if __name__ == "__main__": - pr = Project() - pr.main() diff --git a/reproducibility_project/hoomd4_subproject/src/engines/hoomd4/__init__.py b/reproducibility_project/src/engines/hoomd4/__init__.py similarity index 100% rename from reproducibility_project/hoomd4_subproject/src/engines/hoomd4/__init__.py rename to reproducibility_project/src/engines/hoomd4/__init__.py diff --git a/reproducibility_project/hoomd4_subproject/environment.yml b/reproducibility_project/src/engines/hoomd4/environment.yml similarity index 97% rename from reproducibility_project/hoomd4_subproject/environment.yml rename to reproducibility_project/src/engines/hoomd4/environment.yml index c4cca417..a5fd4add 100644 --- a/reproducibility_project/hoomd4_subproject/environment.yml +++ b/reproducibility_project/src/engines/hoomd4/environment.yml @@ -29,7 +29,7 @@ dependencies: - rdkit - scipy=1.9.0 - seaborn - - signac + - signac>=2 - signac-dashboard - signac-flow - unyt diff --git a/reproducibility_project/hoomd4_subproject/src/engines/hoomd4/project.py b/reproducibility_project/src/engines/hoomd4/project.py similarity index 96% rename from reproducibility_project/hoomd4_subproject/src/engines/hoomd4/project.py rename to reproducibility_project/src/engines/hoomd4/project.py index db0868c7..985da6ba 100644 --- a/reproducibility_project/hoomd4_subproject/src/engines/hoomd4/project.py +++ b/reproducibility_project/src/engines/hoomd4/project.py @@ -59,15 +59,6 @@ def run_shrink(job): run_hoomd(job, "shrink") -# @Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -# @Project.pre(lambda j: j.sp.engine == "hoomd") -# @Project.pre(lambda j: j.doc.get("npt_eq")) -# @Project.post(lambda j: j.doc.get("nvt_finished")) -# def run_nvt(job): -# """Run an NVT simulation with HOOMD-blue.""" -# run_hoomd(job, "nvt", restart=job.isfile("trajectory-nvt.gsd")) - - @Project.pre(lambda j: j.sp.engine == "hoomd") @Project.pre(lambda j: j.doc.get("shrink_finished")) @Project.post(lambda j: j.doc.get("npt_finished")) @@ -100,15 +91,6 @@ def check_equilibration_npt(job): job.doc.npt_finished = check_equilibration(job, "npt", "volume") -# @Project.operation.with_directives({"executable": "$MOSDEF_PYTHON", "ngpu": 1}) -# @Project.pre(lambda j: j.sp.engine == "hoomd") -# @Project.pre(lambda j: j.doc.get("nvt_finished")) -# @Project.post(lambda j: j.doc.get("nvt_eq")) -# def check_equilibration_nvt(job): -# """Check the equilibration of the NVT simulation.""" -# job.doc.nvt_finished = check_equilibration(job, "nvt", "potential_energy") - - @Project.pre(lambda j: j.sp.engine == "hoomd") @Project.pre(lambda j: j.doc.get("npt_eq")) @Project.post(lambda j: j.doc.get("post_processed")) @@ -127,6 +109,8 @@ def post_process(job): import numpy.lib.recfunctions as rf import unyt as u + # Create a clean version of log-npt.txt file to be in consistent format with others. + # All the data points are preserved (not trimmed). for filename in ["log-npt-raw.txt"]: rawlogfile = job.fn(filename) logfile = job.fn(filename.replace("-raw", "")) @@ -419,6 +403,7 @@ def run_hoomd(job, method, restart=False): for f in forcefield: logger.add(f, quantities=["energy"]) + # Write out the data to raw file table_file = hoomd.write.Table( output=open( job.fn(f"log-{method}-raw.txt"), mode=f"{writemode}", newline="\n" @@ -541,6 +526,13 @@ def check_equilibration(job, method, eq_property, min_t0=100): data = clean_data(data) prop_data = data[f"mdcomputeThermodynamicQuantities{eq_property}"] iseq, _, _, _ = eq.is_equilibrated(prop_data) + + # Note: The following block generates average property value stored hoomd4 job.doc. + # However, this value is not used in the final data analysis + # (combining all data from all simulation engines) + # The final analysis (defined in project-analysis.py) performs on the log-{method}-raw.txt + # and utilized slightly different threshold for equilibraiton detection to ensure + # overall consistency across engines. if iseq: uncorr, t0, g, N = eq.trim_non_equilibrated(prop_data) # Sometimes the trim_non_equilibrated function does not cut off enough diff --git a/reproducibility_project/src/engines/lammps-UD/project.py b/reproducibility_project/src/engines/lammps-UD/project.py deleted file mode 100644 index b34aca43..00000000 --- a/reproducibility_project/src/engines/lammps-UD/project.py +++ /dev/null @@ -1,216 +0,0 @@ -"""Setup for signac, signac-flow, signac-dashboard for this study.""" - -# import foyer -import os -import pathlib - -import flow -from flow import environments - - -class Project(flow.FlowProject): - """Subclass of FlowProject to provide custom methods and attributes.""" - - def __init__(self): - super().__init__() - current_path = pathlib.Path(os.getcwd()).absolute() - self.data_dir = current_path.parents[1] / "data" - self.ff_fn = self.data_dir / "forcefield.xml" - - -# ____________________________________________________________________________ -"""Setting progress label""" - - -@Project.label -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -def lammps_created_box(job): - """Check if the lammps simulation box has been created for the job.""" - return job.isfile("box.lammps") - - -@Project.label -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -def lammps_copy_files(job): - """Check if the submission scripts have been copied over for the job.""" - return job.isfile("submit.pbs") - - -@Project.label -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -def lammps_minimized_equilibrated_nvt(job): - """Check if the lammps minimization step has run for the job.""" - return job.isfile("minimized.restart_0") - - -@Project.label -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -def lammps_equilibrated_npt(job): - """Check if the lammps equilibration step has run and passed is_equilibrated for the job.""" - return job.isfile("equilibrated_npt.restart") and True - - -@Project.label -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -def lammps_production(job): - """Check if the lammps production step has run for the job.""" - return job.isfile("production.restart") - - -@Project.label -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -def lammps_density_data(job): - """Check if lammps has output density information for the job.""" - return job.isfile("density.dat") - - -@Project.label -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -def lammps_created_gsd(job): - """Check if the mdtraj has converted the production to a gsd trajectory for the job.""" - return job.isfile("prod.gsd") - - -# _____________________________________________________________________ -"""Setting up workflow operation""" - - -@Project.operation -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -@Project.post(lammps_created_box) -@flow.with_job -@flow.cmd -def built_lammps(job): - """Create initial configurations of the system statepoint.""" - from mbuild.formats.lammpsdata import write_lammpsdata - from project.src.molecules.system_builder import SystemBuilder - - system = SystemBuilder(job) - parmed_structure = system.to_parmed() - # Apply forcefield from statepoint - if job.sp.forcefield_name == "trappe-ua": - ff = foyer.Forcefield(name="trappe-ua") - elif job.sp.forcefield_name == "oplsaa": - ff = foyer.Forcefield(name="oplsaa") - elif job.sp.forcefield_name == "spce": - ff = foyer.Forcefield( - name="spce" - ) # TODO: Make sure this gets applied correctly - else: - raise Exception( - "No forcefield has been applied to this system {}".format(job.id) - ) - typed_surface = ff.apply(parmed_structure) - write_lammpsdata( - system, - "box.lammps", - atom_style="full", - unit_style="real", - mins=system.get_boundingbox().vectors[0], - maxs=system.get_boundingbox().vectors[1], - use_rb_torsions=True, - ) - return - - -@Project.operation -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -@Project.pre(lammps_created_box) -@Project.post(lammps_copy_files) -@flow.with_job -@flow.cmd -def lammps_cp_files(job): - """Copy over run files for lammps and the SLURM scheduler.""" - lmps_submit_path = "../../src/engine_input/lammps/UD_scripts/submit.slurm" - lmps_run_path = "../../src/engine_input/lammps/input_scripts/in.*" - msg = f"cp {lmps_submit_path} {lmps_run_path} ./" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -@Project.pre(lammps_copy_files) -@Project.post(lammps_minimized_equilibrated_nvt) -@flow.with_job -@flow.cmd -def lammps_em_nvt(job): - """Run energy minimization and nvt ensemble.""" - in_script_name = "in.minimize" - modify_submit_lammps(in_script_name, job.sp) - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica} {job.sp.temperature} {job.sp.pressure} {job.sp.cutoff}" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -@Project.pre(lammps_minimized_equilibrated_nvt) -@Project.post(lammps_equilibrated_npt) -@flow.with_job -@flow.cmd -def lammps_equil_npt(job): - """Run npt ensemble equilibration.""" - in_script_name = "in.equil" - modify_submit_lammps(in_script_name, job.sp) - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica} {job.sp.temperature} {job.sp.pressure} {job.sp.cutoff}" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -@Project.pre(lammps_equilibrated_npt) -@Project.post(lammps_production) -@flow.with_job -@flow.cmd -def lammps_prod(job): - """Run npt ensemble production.""" - in_script_name = "in.prod" - modify_submit_lammps(in_script_name, job.sp) - msg = f"sbatch submit.slurm {in_script_name} {job.sp.replica} {job.sp.temperature} {job.sp.pressure} {job.sp.cutoff}" - return msg - - -@Project.operation -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -@Project.pre(lammps_production) -@flow.with_job -@flow.cmd -def lammps_calc_density(job): - """Create a density text file.""" - return - - -@Project.operation -@Project.pre(lambda j: j.sp.simulation_engine == "lammps-UD") -@Project.pre(lammps_production) -@flow.with_job -@flow.cmd -def lammps_calc_rdf(job): - """Create an rdf from the gsd file using Freud analysis scripts.""" - import mbuild as mb - import MDAnalysis as mda - - traj = mda.coordinates.XTC.XTCReader("prod.xtc") - top = mda.topology.LAMMPSParser.DATAParser("box.lammps") - u = mda.Universe(top, traj) - u.trajectory.next(-1) - parmed_structure = u.convert_to("PARMED") - mb.formats.gsdwriter.write_gsd(parmed_structure, "prod.gsd") - # TODO: Use freud rdf PR to create an RDF from the gsd file - return - - -# TODO: modify this for your purpose -def modify_submit_lammps(filename, statepoint): - """Modify the submission scripts to include the job and simulation type in the header.""" - with open("submit.slurm", "r") as f: - lines = f.readlines() - lines[1] = "#SBATCH -J {}{}\n".format(filename, statepoint) - with open("submit.slurm", "w") as f: - f.write(lines) - return - - -if __name__ == "__main__": - pr = Project() - pr.main() - breakpoint() diff --git a/reproducibility_project/tests/test_diffusion.py b/reproducibility_project/tests/test_diffusion.py deleted file mode 100644 index b0d8dfe1..00000000 --- a/reproducibility_project/tests/test_diffusion.py +++ /dev/null @@ -1,21 +0,0 @@ -import freud -import numpy as np - -from reproducibility_project.src.analysis.diffusion import _gsd_msd, gsd_msd -from reproducibility_project.tests.base_test import BaseTest - - -class TestDiffusion(BaseTest): - """Tests to ensure MSD/diffusion behaves as expected.""" - - def test_diffusion(self, job_gsdfile): - msd = gsd_msd(job_gsdfile) - assert job_gsdfile.isfile("msd.txt") - assert job_gsdfile.isfile("msd.png") - assert job_gsdfile.doc.get("diffusion_coefficient") is not None - - def test_gsdfile_random(self, gsdfile_random): - msd, timesteps = _gsd_msd(gsdfile_random, 0, 1, False) - assert isinstance(msd, freud.msd.MSD) - assert np.isclose(max(msd.msd), 50.0015816) - assert len(msd.msd) == len(timesteps) diff --git a/reproducibility_project/waterspce_nist_subproject/aggregate_summary/aggregate_init.py b/reproducibility_project/waterspce_nist_subproject/aggregate_summary/aggregate_init.py index a3957584..d05b3c91 100644 --- a/reproducibility_project/waterspce_nist_subproject/aggregate_summary/aggregate_init.py +++ b/reproducibility_project/waterspce_nist_subproject/aggregate_summary/aggregate_init.py @@ -21,7 +21,7 @@ def dict_product(dd): "gromacs", "lammps-VU", ] -md_engines = ["gromacs", "hoomd", "lammps-VU", "lammps-UD"] +md_engines = ["gromacs", "hoomd", "lammps-VU"] mc_engines = ["cassandra", "mcccs", "gomc"] forcefields = {} r_cuts = {} diff --git a/reproducibility_project/waterspce_nist_subproject/subproject-analysis.py b/reproducibility_project/waterspce_nist_subproject/subproject-analysis.py index d7fa64b2..cf14e6cf 100644 --- a/reproducibility_project/waterspce_nist_subproject/subproject-analysis.py +++ b/reproducibility_project/waterspce_nist_subproject/subproject-analysis.py @@ -36,7 +36,7 @@ def add_args(cls, parser): mc_engines = ("gomc", "mcccs", "cassandra") -md_engines = ("gromacs", "hoomd", "lammps-VU") # , "lammps-UD") +md_engines = ("gromacs", "hoomd", "lammps-VU") md_npt_props = [ "potential_energy", "kinetic_energy", diff --git a/reproducibility_project/waterspce_nist_subproject/subproject-init.py b/reproducibility_project/waterspce_nist_subproject/subproject-init.py index 640b031c..4b77bed7 100644 --- a/reproducibility_project/waterspce_nist_subproject/subproject-init.py +++ b/reproducibility_project/waterspce_nist_subproject/subproject-init.py @@ -22,7 +22,7 @@ def dict_product(dd): "gromacs", "lammps-VU", ] -md_engines = ["gromacs", "hoomd", "lammps-VU", "lammps-UD"] +md_engines = ["gromacs", "hoomd", "lammps-VU"] mc_engines = ["cassandra", "mcccs", "gomc"] forcefields = {} r_cuts = {}