diff --git a/deepdrivemd/api.py b/deepdrivemd/api.py index 7f40311..4967d1b 100644 --- a/deepdrivemd/api.py +++ b/deepdrivemd/api.py @@ -183,7 +183,8 @@ def workdir(self) -> Path: if self.config.node_local_path is None else self.config.node_local_path ) - workdir = workdir_parent / f"run-{uuid.uuid4()}" + timestamp = datetime.now().strftime("%d%m%y-%H%M%S") + workdir = workdir_parent / f"run-{timestamp}-{uuid.uuid4()}" workdir.mkdir(exist_ok=True, parents=True) self.__workdir = workdir return workdir diff --git a/deepdrivemd/apps/cvae_inference/__init__.py b/deepdrivemd/apps/cvae_inference/__init__.py index 275b90c..45a20d2 100644 --- a/deepdrivemd/apps/cvae_inference/__init__.py +++ b/deepdrivemd/apps/cvae_inference/__init__.py @@ -35,6 +35,7 @@ class CVAEInferenceSettings(ApplicationSettings): """The number of cores to use for sklearn LOF method.""" num_outliers: int = 120 """The number of latent space outliers to consider when picking the minimal RMSD structures.""" + use_target: bool = True # validators _cvae_settings_yaml = path_validator("cvae_settings_yaml") diff --git a/deepdrivemd/apps/cvae_inference/app.py b/deepdrivemd/apps/cvae_inference/app.py index 6b2d828..37b357b 100644 --- a/deepdrivemd/apps/cvae_inference/app.py +++ b/deepdrivemd/apps/cvae_inference/app.py @@ -70,8 +70,9 @@ def run(self, input_data: CVAEInferenceInput) -> CVAEInferenceOutput: ) .sort_values("lof") # First sort by lof score .head(self.config.num_outliers) # Take the smallest num_outliers lof scores - .sort_values("rmsd") # Finally, sort the smallest lof scores by rmsd ) + if self.config.use_target: + df = df.sort_values("rmsd") # Finally, sort the smallest lof scores by rmsd df.to_csv(self.workdir / "outliers.csv") diff --git a/deepdrivemd/apps/cvae_train/__init__.py b/deepdrivemd/apps/cvae_train/__init__.py index b6d315f..2aeee01 100644 --- a/deepdrivemd/apps/cvae_train/__init__.py +++ b/deepdrivemd/apps/cvae_train/__init__.py @@ -30,7 +30,7 @@ class CVAESettings(BaseSettings): latent_dim: int = 10 lambda_rec: float = 1.0 num_data_workers: int = 0 - prefetch_factor: int = 2 + prefetch_factor: Optional[int] = 2 batch_size: int = 64 device: str = "cuda" optimizer_name: str = "RMSprop" diff --git a/deepdrivemd/apps/openmm_simulation/__init__.py b/deepdrivemd/apps/openmm_simulation/__init__.py index 225785a..d4ae398 100644 --- a/deepdrivemd/apps/openmm_simulation/__init__.py +++ b/deepdrivemd/apps/openmm_simulation/__init__.py @@ -19,14 +19,18 @@ class MDSimulationSettings(ApplicationSettings): simulation_length_ns: float = 10 report_interval_ps: float = 50 dt_ps: float = 0.002 - temperature_kelvin: float = 310.0 + hydrogen_mass: float = 1.0 + temperature_kelvin: float = 300.0 heat_bath_friction_coef: float = 1.0 - rmsd_reference_pdb: Path + rmsd_reference_pdb: Optional[Path] = None """Reference PDB file to compute RMSD to each frame.""" mda_selection: str = "protein and name CA" """MDAnalysis selection to run contact map and RMSD analysis on.""" cutoff_angstrom: float = 8.0 """Atoms within this cutoff are said to be in contact.""" + explicit_barostat: Optional[str] = None + """barostat type, none means NVT""" + run_minimization: bool = False # validators _rmsd_reference_pdb_exists = path_validator("rmsd_reference_pdb") diff --git a/deepdrivemd/apps/openmm_simulation/app.py b/deepdrivemd/apps/openmm_simulation/app.py index 64c93e1..3860b63 100644 --- a/deepdrivemd/apps/openmm_simulation/app.py +++ b/deepdrivemd/apps/openmm_simulation/app.py @@ -13,6 +13,7 @@ except ImportError: pass # For testing purposes +import parmed as pmd from MDAnalysis.analysis import align, distances, rms from deepdrivemd.api import Application, PathLike @@ -44,10 +45,10 @@ def _configure_amber_implicit( ) else: pdb = app.PDBFile(str(pdb_file)) - top = pdb.topology + top = pdb forcefield = app.ForceField("amber14-all.xml", "implicit/gbn2.xml") system = forcefield.createSystem( - top, + top.topology, nonbondedMethod=app.CutoffNonPeriodic, nonbondedCutoff=1.0 * u.nanometer, constraints=app.HBonds, @@ -61,7 +62,9 @@ def _configure_amber_implicit( ) integrator.setConstraintTolerance(0.00001) - sim = app.Simulation(top, system, integrator, platform, platform_properties) + sim = app.Simulation( + top.topology, system, integrator, platform, platform_properties + ) # Returning the pdb file object for later use to reduce I/O. # If a topology file is passed, the pdb variable is None. @@ -69,20 +72,34 @@ def _configure_amber_implicit( def _configure_amber_explicit( + pdb_file: PathLike, top_file: PathLike, dt_ps: float, + hydrogen_mass: float, temperature_kelvin: float, heat_bath_friction_coef: float, platform: "openmm.Platform", platform_properties: Dict[str, str], explicit_barostat: str, ) -> "app.Simulation": - top = app.AmberPrmtopFile(str(top_file)) - system = top.createSystem( - nonbondedMethod=app.PME, - nonbondedCutoff=1.0 * u.nanometer, - constraints=app.HBonds, - ) + top = pmd.load_file(str(top_file), xyz=str(pdb_file)) + if dt_ps > 0.002: + """ + https://github.com/openmm/openmm/issues/3117#issuecomment-841492272 + https://github.com/openmm/openmm/issues/2520 + """ + system = top.createSystem( + nonbondedMethod=app.PME, + nonbondedCutoff=1.0 * u.nanometer, + constraints=app.HBonds, + hydrogenMass=hydrogen_mass * u.amu, + ) + else: + system = top.createSystem( + nonbondedMethod=app.PME, + nonbondedCutoff=1.0 * u.nanometer, + constraints=app.HBonds, + ) # Congfigure integrator integrator = openmm.LangevinIntegrator( @@ -101,7 +118,7 @@ def _configure_amber_explicit( (1, 1, 1) * u.bar, temperature_kelvin * u.kelvin, False, False, True ) ) - else: + elif isinstance(explicit_barostat, str): raise ValueError(f"Invalid explicit_barostat option: {explicit_barostat}") sim = app.Simulation( @@ -121,6 +138,7 @@ def configure_simulation( solvent_type: str, gpu_index: int, dt_ps: float, + hydrogen_mass: float, temperature_kelvin: float, heat_bath_friction_coef: float, explicit_barostat: str = "MonteCarloBarostat", @@ -189,8 +207,10 @@ def configure_simulation( assert top_file is not None pdb = None sim = _configure_amber_explicit( + pdb_file, top_file, dt_ps, + hydrogen_mass, temperature_kelvin, heat_bath_friction_coef, platform, @@ -230,17 +250,20 @@ def copy_topology(self, directory: Path) -> Optional[Path]: """Scan directory for optional topology file (assumes topology file is in the same directory as the PDB file and that only one PDB/topology file exists in each directory.)""" - top_file = next(directory.glob("*.top"), None) - if top_file is None: - top_file = next(directory.glob("*.prmtop"), None) + top_file = next(directory.glob("*.prmtop"), None) if top_file is not None: top_file = self.copy_to_workdir(top_file) + return top_file def generate_restart_pdb(self, sim_dir: Path, frame: int) -> Path: """Generate a new PDB from a given `frame` of a previous simulation.""" old_pdb_file = next(sim_dir.glob("*.pdb")) - dcd_file = next(sim_dir.glob("*.dcd")) + try: + dcd_file = next(sim_dir.glob("*.dcd")) + except: + dcd_file = next(sim_dir.glob("*.xtc")) + # New pdb file to write, example: workdir/run-<uuid>_frame000000.pdb pdb_file = self.workdir / f"{old_pdb_file.parent.name}_frame{frame:06}.pdb" mda_u = MDAnalysis.Universe(str(old_pdb_file), str(dcd_file)) @@ -251,11 +274,13 @@ def generate_restart_pdb(self, sim_dir: Path, frame: int) -> Path: def run(self, input_data: MDSimulationInput) -> MDSimulationOutput: # Log the input data input_data.dump_yaml(self.workdir / "input.yaml") + run_minimization = self.config.run_minimization if input_data.sim_frame is None: # No restart point, starting from initial PDB pdb_file = next(input_data.sim_dir.glob("*.pdb")) pdb_file = self.copy_to_workdir(pdb_file) + run_minimization = True assert pdb_file is not None else: # Collect PDB, DCD, and topology files from previous simulation @@ -273,8 +298,11 @@ def run(self, input_data: MDSimulationInput) -> MDSimulationOutput: solvent_type=self.config.solvent_type, gpu_index=0, dt_ps=self.config.dt_ps, + hydrogen_mass=self.config.hydrogen_mass, temperature_kelvin=self.config.temperature_kelvin, heat_bath_friction_coef=self.config.heat_bath_friction_coef, + explicit_barostat=self.config.explicit_barostat, + run_minimization=run_minimization, ) # openmm typed variables @@ -288,8 +316,13 @@ def run(self, input_data: MDSimulationInput) -> MDSimulationOutput: nsteps = int(simulation_length_ns / dt_ps) # Set up reporters to write simulation trajectory file and logs - traj_file = self.workdir / "sim.dcd" - sim.reporters.append(app.DCDReporter(traj_file, report_steps)) + if int(openmm.__version__[0]) > 7: + traj_file = str(self.workdir / "sim.xtc") + sim.reporters.append(app.XTCReporter(traj_file, report_steps)) + else: + traj_file = str(self.workdir / "sim.dcd") + sim.reporters.append(app.DCDReporter(traj_file, report_steps)) + sim.reporters.append( app.StateDataReporter( str(self.workdir / "sim.log"), @@ -333,7 +366,11 @@ def analyze_simulation( # Compute contact maps, rmsd, etc in bulk mda_u = MDAnalysis.Universe(str(pdb_file), str(traj_file)) - ref_u = MDAnalysis.Universe(str(self.config.rmsd_reference_pdb)) + if self.config.rmsd_reference_pdb is None: + ref_pdb = pdb_file + else: + ref_pdb = self.config.rmsd_reference_pdb + ref_u = MDAnalysis.Universe(str(ref_pdb)) # Align trajectory to compute accurate RMSD align.AlignTraj( mda_u, ref_u, select=self.config.mda_selection, in_memory=True diff --git a/requirements/env.yml b/requirements/env.yml new file mode 100644 index 0000000..6b1bcd8 --- /dev/null +++ b/requirements/env.yml @@ -0,0 +1,129 @@ +name: ddmd2 +channels: + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 + - bzip2=1.0.8 + - ca-certificates=2024.8.30 + - certifi=2024.8.30 + - cudatoolkit=11.8.0 + - ld_impl_linux-64=2.40 + - libblas=3.9.0 + - libcblas=3.9.0 + - libffi=3.4.4 + - libgcc=14.1.0 + - libgcc-ng=14.1.0 + - libgfortran=14.1.0 + - libgfortran-ng=14.1.0 + - libgfortran5=14.1.0 + - liblapack=3.9.0 + - libopenblas=0.3.27 + - libstdcxx=14.1.0 + - libstdcxx-ng=14.1.0 + - libuuid=1.41.5 + - llvm-openmp=14.0.6 + - ncurses=6.4 + - numpy=1.26.4 + - ocl-icd=2.3.2 + - ocl-icd-system=1.0.0 + - openmm=8.1.2 + - openssl=3.3.2 + - pip=24.2 + - python=3.10.14 + - python_abi=3.10 + - readline=8.2 + - scipy=1.14.1 + - setuptools=75.1.0 + - sqlite=3.45.3 + - tk=8.6.14 + - wheel=0.44.0 + - xz=5.4.6 + - zlib=1.2.13 + - pip: + - bcrypt==4.2.0 + - cffi==1.17.1 + - charset-normalizer==3.3.2 + - cloudpickle==3.0.0 + - colmena==0.4.5 + - contourpy==1.3.0 + - cryptography==43.0.1 + - cycler==0.12.1 + - deepdrivemd==0.0.2a1 + - dill==0.3.8 + - fasteners==0.19 + - filelock==3.16.1 + - fonttools==4.54.1 + - fsspec==2024.9.0 + - globus-sdk==3.45.0 + - griddataformats==1.0.2 + - h5py==3.11.0 + - idna==3.10 + - jinja2==3.1.4 + - joblib==1.4.2 + - kiwisolver==1.4.7 + - lazy-object-proxy==1.10.0 + - markupsafe==2.1.5 + - matplotlib==3.9.2 + - mda-xdrlib==0.2.0 + - mdanalysis==2.7.0 + - mdlearn==0.0.10a1 + - mmtf-python==1.1.3 + - mpmath==1.3.0 + - mrcfile==1.5.3 + - msgpack==1.1.0 + - natsort==8.4.0 + - networkx==3.3 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==9.1.0.70 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.20.5 + - nvidia-nvjitlink-cu12==12.6.68 + - nvidia-nvtx-cu12==12.1.105 + - packaging==24.1 + - pandas==2.2.3 + - paramiko==3.5.0 + - parmed==4.2.2 + - parsl==2023.3.13 + - pillow==10.4.0 + - plotly==5.24.1 + - proxystore==0.4.1 + - psutil==6.0.0 + - pycparser==2.22 + - pydantic==1.10.9 + - pyjwt==2.9.0 + - pynacl==1.5.0 + - pyparsing==3.1.4 + - python-dateutil==2.9.0.post0 + - pytz==2024.2 + - pyyaml==6.0.2 + - pyzmq==26.2.0 + - redis==3.4.1 + - requests==2.32.3 + - scikit-learn==1.5.2 + - setproctitle==1.3.3 + - six==1.16.0 + - sympy==1.13.3 + - tblib==3.0.0 + - tenacity==9.0.0 + - threadpoolctl==3.5.0 + - torch==2.4.1 + - torchaudio==2.4.1+cu118 + - torchvision==0.19.1+cu118 + - tqdm==4.66.5 + - triton==3.0.0 + - typeguard==4.3.0 + - types-paramiko==3.5.0.20240918 + - types-requests==2.32.0.20240914 + - types-six==1.16.21.20240513 + - typing-extensions==4.12.2 + - tzdata==2024.2 + - urllib3==2.2.3 +prefix: /homes/heng.ma/miniconda3/envs/ddmd2 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c2d9e26..3803320 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ parsl==2023.3.13 -pydantic==1.10.6 +pydantic==1.10.9 pyyaml==6.0 colmena==0.4.5 MDAnalysis==2.3.0 diff --git a/setup.cfg b/setup.cfg index f25aec3..ce3efbf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,15 +12,16 @@ author_email = ramanathana@anl.gov [options] packages = find: install_requires = - pydantic==1.10.6 - pyyaml==6.0 + pydantic==1.10.9 + pyyaml colmena==0.4.5 parsl==2023.3.13 - MDAnalysis==2.3.0 + MDAnalysis mdlearn==0.0.10a1 - scikit-learn==1.1.3 + scikit-learn torch natsort + parmed python_requires = >=3.8 @@ -67,4 +68,4 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-mdlearn.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True