From 0bb439b8d38f8ab59977b47771dcc88fba1fe436 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:18:02 -0700 Subject: [PATCH 01/10] ensure depth is defined for the Root node --- pysidt/sidt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysidt/sidt.py b/pysidt/sidt.py index 3782c12..7f6fe3f 100644 --- a/pysidt/sidt.py +++ b/pysidt/sidt.py @@ -97,7 +97,7 @@ def __init__( node = node.parent self.root = node elif root_group: - self.root = Node(root_group, name="Root") + self.root = Node(root_group, name="Root", depth=0) self.nodes = {"Root": self.root} def load(self, nodes): From f83a35961d35c88f70d1a05cc59c41af377c5111 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:19:36 -0700 Subject: [PATCH 02/10] add function for dumping simple objects to dictionaries --- pysidt/sidt.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pysidt/sidt.py b/pysidt/sidt.py index 7f6fe3f..508c574 100644 --- a/pysidt/sidt.py +++ b/pysidt/sidt.py @@ -309,6 +309,33 @@ def evaluate(self, mol): return node.rule +def to_dict(obj): + out_dict = dict() + out_dict["class"] = obj.__class__.__name__ + attrs = [attr for attr in dir(obj) if not attr.startswith('_')] + for attr in attrs: + + val = getattr(obj, attr) + + if callable(val) or val == getattr(obj.__class__,attr): + continue + + try: + json.dumps(val) + out_dict[attr] = val + except: + if isinstance(val, ScalarQuantity): + out_dict[attr] = { + "class": val.__class__.__name__, + "value": val.value, + "units": val.units, + "uncertainty": val.uncertainty, + "uncertainty_type": val.uncertainty_type, + } + else: + out_dict[attr] = to_dict(val) + + return out_dict def write_nodes(tree, file): nodesdict = dict() From 0a3c5d62a3f32e0d709577886d13baa803b951c2 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:19:57 -0700 Subject: [PATCH 03/10] add function for constructing simple objects from dictionaries from_dict docstring --- pysidt/sidt.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pysidt/sidt.py b/pysidt/sidt.py index 508c574..0b6f3d2 100644 --- a/pysidt/sidt.py +++ b/pysidt/sidt.py @@ -337,6 +337,32 @@ def to_dict(obj): return out_dict + +def from_dict(d, class_dict=None): + """construct objects from dictionary + + Args: + d (dict): dictionary describing object, particularly containing a value + associated with "class" identifying a string of the class of the object + class_dict (dict, optional): dictionary mapping class strings to the class objects/constructors + + Returns: + object associated with dictionary + """ + if class_dict is None: + class_dict = dict() + + construct_d = dict() + for k, v in d.items(): + if k == "class": + continue + if isinstance(v,dict) and "class" in v.keys(): + construct_d[k] = from_dict(v, class_dict=class_dict) + else: + construct_d[k] = v + + return class_dict[d["class"]](**construct_d) + def write_nodes(tree, file): nodesdict = dict() for node in tree.nodes.values(): From 61dea0fa09af1ee9ad95255704c1a07fe49e69ba Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:20:32 -0700 Subject: [PATCH 04/10] improve write_nodes to handle simple objects write nodes amend --- pysidt/sidt.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pysidt/sidt.py b/pysidt/sidt.py index 0b6f3d2..48ae9c7 100644 --- a/pysidt/sidt.py +++ b/pysidt/sidt.py @@ -370,12 +370,26 @@ def write_nodes(tree, file): p = None else: p = node.parent.name + + try: + json.dumps(node.rule) + rule = node.rule + except (TypeError, OverflowError): + rule = to_dict(node.rule) #will work on all rmgmolecule objects, new objects need this method implemented + try: + json.dumps(rule) + except: + raise ValueError( + f"Could not serialize object {node.rule.__class__.__name__}" + ) + nodesdict[node.name] = { "group": node.group.to_adjacency_list(), - "rule": node.rule, + "rule": rule, "parent": p, "children": [x.name for x in node.children], "name": node.name, + "depth": node.depth } with open(file, "w") as f: From d19dda30160c5d5560dc7bcadc988b1950e67e3e Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:21:25 -0700 Subject: [PATCH 05/10] improve read_nodes to handle simple objects --- pysidt/sidt.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pysidt/sidt.py b/pysidt/sidt.py index 48ae9c7..72b168e 100644 --- a/pysidt/sidt.py +++ b/pysidt/sidt.py @@ -1,4 +1,5 @@ from molecule.molecule import Group +from molecule.quantity import ScalarQuantity from pysidt.extensions import split_mols, get_extension_edge from pysidt.regularization import simple_regularization from pysidt.decomposition import * @@ -396,7 +397,17 @@ def write_nodes(tree, file): json.dump(nodesdict, f) -def read_nodes(file): +def read_nodes(file, class_dict=dict()): + """_summary_ + + Args: + file (string): string of JSON file to laod + class_dict (dict): maps class names to classes for any non-JSON + serializable types that need constructed + + Returns: + nodes (list): list of nodes for tree + """ with open(file, "r") as f: nodesdict = json.load(f) nodes = dict() @@ -414,6 +425,8 @@ def read_nodes(file): if node.parent: node.parent = nodes[node.parent] node.children = [nodes[child] for child in node.children] + if isinstance(node.rule, dict) and "class" in node.rule.keys(): + node.rule = from_dict(node.rule, class_dict=class_dict) return nodes From d99a09d1ac87336205d1e99825acc0be7a45a9c9 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:21:56 -0700 Subject: [PATCH 06/10] change environment file to use rmgmolecule --- environment.yml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/environment.yml b/environment.yml index e552313..4a8d0bf 100644 --- a/environment.yml +++ b/environment.yml @@ -5,15 +5,12 @@ channels: - mjohnson541 - rmg dependencies: - - rdkit >=2020.03.3.0 - - cython >=0.25.2 - - conda-forge::openbabel >= 3 - - cairo - - cairocffi - - pyrdl + - python >=3.7,<3.10 + - jupyter - nose - - rmg::lpsolve55 - - rmg::quantities - - scipy + - numpy + - pydot + - mjohnson541::rmgmolecule >=0.3.0 - scikit-learn - - pydot \ No newline at end of file + - scipy + From 00dc2516d67e51598bbb0ada2681e2bdb5f693cb Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:26:42 -0700 Subject: [PATCH 07/10] add matplotlib to environment --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 4a8d0bf..c736894 100644 --- a/environment.yml +++ b/environment.yml @@ -7,6 +7,7 @@ channels: dependencies: - python >=3.7,<3.10 - jupyter + - matplotlib - nose - numpy - pydot From 55ab89bda3baa4c9b075d1b23b0149bcb0b9a2f7 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 19 Mar 2024 18:28:51 -0700 Subject: [PATCH 08/10] add notebook for Surface Diffusion single eval SIDT example --- .github/workflows/CI.yml | 1 + ...e_Diffusion_single_eval_SIDT_example.ipynb | 468 ++++++++++++++++++ 2 files changed, 469 insertions(+) create mode 100644 IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 24a055e..027e9f1 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -47,3 +47,4 @@ jobs: - name: Test PySIDT notebooks run: | pytest --nbmake IPython/multi_eval_SIDT_example.ipynb + pytest --nbmake IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb diff --git a/IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb b/IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb new file mode 100644 index 0000000..bce5a6a --- /dev/null +++ b/IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb @@ -0,0 +1,468 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b57e048e", + "metadata": {}, + "outputs": [], + "source": [ + "from pysidt import Datum, SubgraphIsomorphicDecisionTree, write_nodes, read_nodes\n", + "from pysidt.extensions import split_mols\n", + "from pysidt.plotting import plot_tree\n", + "import json\n", + "import logging\n", + "from molecule.molecule import Molecule,Group\n", + "from molecule.molecule.atomtype import ATOMTYPES\n", + "from molecule.kinetics import SurfaceArrhenius\n", + "from molecule.kinetics.uncertainties import RateUncertainty\n", + "from molecule.quantity import ScalarQuantity\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "cf082bc8-733c-4cfc-b1dc-8fb1bcbcb62a", + "metadata": {}, + "source": [ + "Prepare Training Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfd59dd3", + "metadata": {}, + "outputs": [], + "source": [ + "#Surface diffusion mean field rate coefficients on Cu111 computed from Pynta\n", + "diff_dict = {\n", + " '1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}\\n2 H u0 p0 c0 {1,S}\\n3 H u0 p0 c0 {1,S}\\n4 H u0 p0 c0 {1,S}\\n5 *1 O u0 p2 c0 {1,S} {6,S}\\n6 *2 X u0 p0 c0 {5,S}\\n7 *3 X u0 p0 c0\\n': \n", + " SurfaceArrhenius(A=(2.2223e-07,'m^2/(molecule*s)'), n=0.0174829, Ea=(15.6635,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K')),\n", + " '1 N u0 p1 c0 {2,S} {3,S} {4,S}\\n2 H u0 p0 c0 {1,S}\\n3 H u0 p0 c0 {1,S}\\n4 C u0 p0 c0 {1,S} {5,S} {6,D}\\n5 H u0 p0 c0 {4,S}\\n6 *1 C u0 p0 c0 {4,D} {7,S} {8,S}\\n7 H u0 p0 c0 {6,S}\\n8 *2 X u0 p0 c0 {6,S}\\n9 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(5.13926e-10,'m^2/(molecule*s)'), n=1.01787, Ea=(10.2887,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00322, dn = +|- 0.000407601, dEa = +|- 0.00283132 kJ/mol\"\"\"),\n", + " '1 O u0 p2 c0 {2,D}\\n2 *1 C u0 p0 c0 {1,D} {3,D}\\n3 *2 X u0 p0 c0 {2,D}\\n4 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(2.78393e-08,'m^2/(molecule*s)'), n=0.0503798, Ea=(4.61439,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00513, dn = +|- 0.000648231, dEa = +|- 0.00450281 kJ/mol\"\"\"),\n", + " '1 *1 O u0 p2 c0 {2,D}\\n2 *2 X u0 p0 c0 {1,D}\\n3 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(4.36036e-07,'m^2/(molecule*s)'), n=0.0110633, Ea=(27.0082,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00512, dn = +|- 0.000646724, dEa = +|- 0.00449234 kJ/mol\"\"\"),\n", + " '1 *1 N u0 p1 c0 {2,T}\\n2 *2 X u0 p0 c0 {1,T}\\n3 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(5.22663e-07,'m^2/(molecule*s)'), n=0.0171004, Ea=(17.9096,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00831, dn = +|- 0.00104785, dEa = +|- 0.0072787 kJ/mol\"\"\"),\n", + " '1 *1 O u0 p2 c0 {2,S} {3,S}\\n2 H u0 p0 c0 {1,S}\\n3 *2 X u0 p0 c0 {1,S}\\n4 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(2.6936e-07,'m^2/(molecule*s)'), n=-0.024542, Ea=(13.3305,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00955, dn = +|- 0.00120318, dEa = +|- 0.00835765 kJ/mol\"\"\"),\n", + " '1 *1 N u0 p1 c0 {2,S} {3,D}\\n2 H u0 p0 c0 {1,S}\\n3 *2 X u0 p0 c0 {1,D}\\n4 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(4.69378e-07,'m^2/(molecule*s)'), n=0.00356855, Ea=(22.6721,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00213, dn = +|- 0.000270019, dEa = +|- 0.00187564 kJ/mol\"\"\"),\n", + " '1 O u0 p2 c0 {2,D}\\n2 *1 C u0 p0 c0 {1,D} {3,S} {4,S}\\n3 H u0 p0 c0 {2,S}\\n4 *2 X u0 p0 c0 {2,S}\\n5 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(1.18904e-07,'m^2/(molecule*s)'), n=0.0763068, Ea=(1.53955,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.01967, dn = +|- 0.00246664, dEa = +|- 0.017134 kJ/mol\"\"\"),\n", + " '1 *1 H u0 p0 c0 {2,S}\\n2 *2 X u0 p0 c0 {1,S}\\n3 *3 X u0 p0 c0\\n':\n", + " SurfaceArrhenius(A=(8.66e-07,'m^2/(molecule*s)'), n=0.019382, Ea=(12.4395,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.02604, dn = +|- 0.00325525, dEa = +|- 0.022612 kJ/mol\"\"\"), \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10facbba", + "metadata": {}, + "outputs": [], + "source": [ + "len(diff_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c5833c7", + "metadata": {}, + "outputs": [], + "source": [ + "training_data = []\n", + "for adjlist,kin in diff_dict.items():\n", + " d = Datum(Molecule().from_adjacency_list(adjlist), kin)\n", + " training_data.append(d)" + ] + }, + { + "cell_type": "markdown", + "id": "f92c02e1-2985-4160-8c62-897914e1dd9a", + "metadata": {}, + "source": [ + "Class for Surface Diffusion Estimation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94bf9f49", + "metadata": {}, + "outputs": [], + "source": [ + "class SurfaceDiffusionEstimator(SubgraphIsomorphicDecisionTree):\n", + " \n", + " def choose_extension(self, node, exts):\n", + " \"\"\"\n", + " select best extension among the set of extensions\n", + " returns a Node object\n", + " almost always subclassed\n", + " \"\"\"\n", + " Tref = 1000.0\n", + " minval = np.inf\n", + " minext = None\n", + " for ext in exts:\n", + " new,comp = split_mols(node.items, ext)\n", + " val = np.std([np.log(x.value.get_rate_coefficient(T=Tref)) for x in new])*len(new) + np.std([np.log(x.value.get_rate_coefficient(T=Tref)) for x in comp])*len(comp)\n", + " if val < minval:\n", + " minval = val \n", + " minext = ext \n", + " \n", + " return minext\n", + " \n", + " def fit_tree(self, data=None):\n", + " \"\"\"\n", + " fit rule for each node\n", + " \"\"\"\n", + " Tref = 1000.0\n", + " fmax = 1.0e5\n", + " if data:\n", + " self.clear_data()\n", + " self.root.items = data[:]\n", + " self.descend_training_from_top(only_specific_match=False)\n", + " \n", + " for node in self.nodes.values():\n", + " if len(node.items) == 0:\n", + " logging.error(node.name)\n", + " raise ValueError\n", + " \n", + " node.rule = average_kinetics(node.items)\n", + " \n", + " data_mean = np.mean(np.log([k.value.get_rate_coefficient(Tref) for k in node.items]))\n", + " n = len(node.items)\n", + " \n", + " if len(node.items) == 1:\n", + " node.rule.uncertainty = RateUncertainty(mu=0.0, var=(np.log(fmax) / 2.0) ** 2, N=1, Tref=Tref, data_mean=data_mean, correlation=node.name)\n", + " node.rule.comment = f\"Only one reaction rate\"\n", + " else:\n", + " dlnks = np.array([\n", + " np.log(average_kinetics([node.items[k] for k in list(set(range(len(node.items)))-{i})]).get_rate_coefficient(Tref) / \n", + " node.items[i].value.get_rate_coefficient(Tref)) for i in range(len(node.items))\n", + " ])\n", + " mu = np.mean(dlnks)\n", + " s = np.std(dlnks)\n", + " node.rule.uncertainty = RateUncertainty(mu=mu, var=s ** 2, N=n, Tref=Tref, data_mean=data_mean, correlation=node.name)\n", + " \n", + " def evaluate(self, mol):\n", + " \"\"\"\n", + " Evaluate tree for a given possibly labeled mol\n", + " \"\"\"\n", + " children = self.root.children \n", + " node = self.root\n", + " \n", + " while children != []:\n", + " for child in children:\n", + " if mol.is_subgraph_isomorphic(child.group, generate_initial_map=True, save_order=True):\n", + " children = child.children \n", + " node = child\n", + " break\n", + " else:\n", + " break\n", + "\n", + " while node.parent is not None:\n", + " err_parent = abs(node.parent.rule.uncertainty.data_mean + node.parent.rule.uncertainty.mu - node.parent.rule.uncertainty.data_mean) + np.sqrt(2.0*node.parent.rule.uncertainty.var/np.pi)\n", + " err_entry = abs(node.rule.uncertainty.mu) + np.sqrt(2.0*node.rule.uncertainty.var/np.pi)\n", + " if err_entry <= err_parent:\n", + " break\n", + " else:\n", + " node = node.parent\n", + " \n", + " return node.rule\n", + "\n", + "def average_kinetics(items):\n", + " Aunits = items[0].value.A.units\n", + " \n", + " if Aunits in {'cm^3/(mol*s)', 'cm^3/(molecule*s)', 'm^3/(molecule*s)'}:\n", + " Aunits = 'm^3/(mol*s)'\n", + " elif Aunits in {'cm^6/(mol^2*s)', 'cm^6/(molecule^2*s)', 'm^6/(molecule^2*s)'}:\n", + " Aunits = 'm^6/(mol^2*s)'\n", + " elif Aunits in {'s^-1', 'm^3/(mol*s)', 'm^6/(mol^2*s)'}:\n", + " # they were already in SI\n", + " pass\n", + " elif Aunits in {'m^2/(mol*s)', 'cm^2/(mol*s)', 'm^2/(molecule*s)', 'cm^2/(molecule*s)'}:\n", + " # surface: bimolecular (Langmuir-Hinshelwood)\n", + " Aunits = 'm^2/(mol*s)'\n", + " elif Aunits in {'m^5/(mol^2*s)', 'cm^5/(mol^2*s)', 'm^5/(molecule^2*s)', 'cm^5/(molecule^2*s)'}:\n", + " # surface: dissociative adsorption\n", + " Aunits = 'm^5/(mol^2*s)'\n", + " elif Aunits == '':\n", + " # surface: sticking coefficient\n", + " pass\n", + " else:\n", + " raise ValueError(f'Invalid units {Aunits} for averaging kinetics.')\n", + " \n", + " A = np.exp(sum(np.log(d.value.A.value_si) for d in items)/len(items))\n", + " n = sum(d.value.n.value_si for d in items)/len(items)\n", + " Ea = sum(d.value.Ea.value_si for d in items)/len(items)\n", + " return SurfaceArrhenius(A=(A,Aunits), n=n, Ea=(Ea,\"J/mol\"))" + ] + }, + { + "cell_type": "markdown", + "id": "0e3fc001-98ca-4da0-a659-bd5239404fe1", + "metadata": {}, + "source": [ + "Generate Tree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6f7d6f6", + "metadata": {}, + "outputs": [], + "source": [ + "root = Group().from_adjacency_list('1 *1 R u0 px cx {2,[S,D,T,Q]}\\n2 *2 X u0 p0 c0 {1,[S,D,T,Q]}\\n3 *3 X u0 p0 c0\\n')\n", + "sidt = SurfaceDiffusionEstimator(root_group=root,\n", + " r=[ATOMTYPES[x] for x in [\"X\",\"H\",\"C\",\"O\",\"N\"]], r_bonds=[1,2,3,1.5,4,0.0],\n", + " r_un=[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91374aac", + "metadata": {}, + "outputs": [], + "source": [ + "sidt.generate_tree(data=training_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82bab66c", + "metadata": {}, + "outputs": [], + "source": [ + "sidt.fit_tree(data=training_data)" + ] + }, + { + "cell_type": "markdown", + "id": "8a12b7fc-eb9f-4fea-a0fd-4747b0a5b495", + "metadata": {}, + "source": [ + "Visualize Tree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5923f619", + "metadata": {}, + "outputs": [], + "source": [ + "plot_tree(sidt, images=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cfe8492", + "metadata": {}, + "outputs": [], + "source": [ + "plot_tree(sidt, images=False)" + ] + }, + { + "cell_type": "markdown", + "id": "f370a185-5654-4f34-83e4-821ad783346b", + "metadata": {}, + "source": [ + "Tree Evaluation Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a245c12e", + "metadata": {}, + "outputs": [], + "source": [ + "m = Molecule().from_adjacency_list(\"\"\"\n", + "1 H u0 p0 c0 {2,S}\n", + "2 O u0 p2 c0 {1,S} {3,S}\n", + "3 *1 O u0 p2 c0 {2,S} {4,S}\n", + "4 *2 X u0 p0 c0 {3,S}\n", + "5 *3 X u0 p0 c0\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3dcfbca", + "metadata": {}, + "outputs": [], + "source": [ + "out = sidt.evaluate(m)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c757052", + "metadata": {}, + "outputs": [], + "source": [ + "out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a4cb9c1", + "metadata": {}, + "outputs": [], + "source": [ + "out.uncertainty" + ] + }, + { + "cell_type": "markdown", + "id": "e881b090-d084-408c-9fdf-4bc8b60ce91b", + "metadata": {}, + "source": [ + "Analyze Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f9c597b-6e1b-4487-b95f-caab91dee454", + "metadata": {}, + "outputs": [], + "source": [ + "def evaluate_one_node_up(sidt, mol):\n", + " \"\"\"\n", + " Evaluate tree except that if the node was defined by a single reaction move one node up\n", + " \"\"\"\n", + " children = sidt.root.children \n", + " node = sidt.root\n", + " \n", + " while children != []:\n", + " for child in children:\n", + " if mol.is_subgraph_isomorphic(child.group, generate_initial_map=True, save_order=True):\n", + " children = child.children \n", + " node = child\n", + " break\n", + " else:\n", + " break\n", + "\n", + " while node.parent is not None:\n", + " err_parent = abs(node.parent.rule.uncertainty.data_mean + node.parent.rule.uncertainty.mu - node.parent.rule.uncertainty.data_mean) + np.sqrt(2.0*node.parent.rule.uncertainty.var/np.pi)\n", + " err_entry = abs(node.rule.uncertainty.mu) + np.sqrt(2.0*node.rule.uncertainty.var/np.pi)\n", + " if err_entry <= err_parent:\n", + " break\n", + " else:\n", + " node = node.parent\n", + " \n", + " if node.children:\n", + " return node.rule\n", + " else:\n", + " return node.parent.rule" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "baf1d4fb-217c-44b5-b5c0-b223aea9dcd3", + "metadata": {}, + "outputs": [], + "source": [ + "err = [np.abs(np.log(evaluate_one_node_up(sidt,Molecule().from_adjacency_list(st)).get_rate_coefficient(800.0)/diff.get_rate_coefficient(800.0))) for st,diff in diff_dict.items()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bf5ade6-2c42-4594-82b3-112b685f29ff", + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(err,bins=5, density=True)\n", + "plt.ylabel(\"Density\")\n", + "plt.xlabel(\"$\\Delta Log(k)$ in Diffusion Rate Coefficient Prediction at 800 K\")" + ] + }, + { + "cell_type": "markdown", + "id": "594ca4c1-74b9-4d2e-9297-628cef5d1bc4", + "metadata": {}, + "source": [ + "Save and Load Tree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64e7d327-80ab-44e9-833f-a1b2810df86e", + "metadata": {}, + "outputs": [], + "source": [ + "file = \"Surface_Diffusion_tree.json\"\n", + "write_nodes(sidt, file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4cee908-fe69-46f5-a347-9947344f6a53", + "metadata": {}, + "outputs": [], + "source": [ + "nodes = read_nodes(file, class_dict={\"SurfaceArrhenius\": SurfaceArrhenius, \"RateUncertainty\": RateUncertainty, \"ScalarQuantity\": ScalarQuantity})\n", + "sidt_loaded = SurfaceDiffusionEstimator(nodes=nodes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b892557-7f1e-4deb-a558-8bc57f341441", + "metadata": {}, + "outputs": [], + "source": [ + "sidt_loaded.evaluate(m)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89e1d7b0-b277-4365-bd43-8958dc4a7396", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 714bb24b4479ea1cf56b17c81088846fbada4d1b Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Wed, 20 Mar 2024 12:17:09 -0700 Subject: [PATCH 09/10] remove source install of rmgmolecule in CI tests --- .github/workflows/CI.yml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 027e9f1..894407f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -17,11 +17,6 @@ jobs: - name: Checkout PySIDT uses: actions/checkout@v4 - - name: Clone ReactionMechanismGenerator/molecule - run: | - cd .. - git clone -b ts_len_improvements https://github.com/ReactionMechanismGenerator/molecule.git - - name: Create pysidt environment uses: conda-incubator/setup-miniconda@v3 with: @@ -37,13 +32,6 @@ jobs: python -m pip install pytest nbmake python -m pip install -e . - - name: Make and install molecule - run: | - cd ../molecule - make clean - make - pip install -e . - - name: Test PySIDT notebooks run: | pytest --nbmake IPython/multi_eval_SIDT_example.ipynb From 40dad62b5b70ca957e4901d4deb1ed3ff93b7c31 Mon Sep 17 00:00:00 2001 From: Hao-Wei Pang Date: Thu, 21 Mar 2024 09:29:15 -0400 Subject: [PATCH 10/10] Formatting --- pysidt/sidt.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pysidt/sidt.py b/pysidt/sidt.py index 72b168e..18c5ba2 100644 --- a/pysidt/sidt.py +++ b/pysidt/sidt.py @@ -310,17 +310,17 @@ def evaluate(self, mol): return node.rule + def to_dict(obj): out_dict = dict() out_dict["class"] = obj.__class__.__name__ - attrs = [attr for attr in dir(obj) if not attr.startswith('_')] + attrs = [attr for attr in dir(obj) if not attr.startswith("_")] for attr in attrs: - val = getattr(obj, attr) - - if callable(val) or val == getattr(obj.__class__,attr): + + if callable(val) or val == getattr(obj.__class__, attr): continue - + try: json.dumps(val) out_dict[attr] = val @@ -335,7 +335,7 @@ def to_dict(obj): } else: out_dict[attr] = to_dict(val) - + return out_dict @@ -357,13 +357,14 @@ def from_dict(d, class_dict=None): for k, v in d.items(): if k == "class": continue - if isinstance(v,dict) and "class" in v.keys(): + if isinstance(v, dict) and "class" in v.keys(): construct_d[k] = from_dict(v, class_dict=class_dict) else: construct_d[k] = v - + return class_dict[d["class"]](**construct_d) + def write_nodes(tree, file): nodesdict = dict() for node in tree.nodes.values(): @@ -371,12 +372,14 @@ def write_nodes(tree, file): p = None else: p = node.parent.name - + try: json.dumps(node.rule) rule = node.rule except (TypeError, OverflowError): - rule = to_dict(node.rule) #will work on all rmgmolecule objects, new objects need this method implemented + rule = to_dict( + node.rule + ) # will work on all rmgmolecule objects, new objects need this method implemented try: json.dumps(rule) except: @@ -390,7 +393,7 @@ def write_nodes(tree, file): "parent": p, "children": [x.name for x in node.children], "name": node.name, - "depth": node.depth + "depth": node.depth, } with open(file, "w") as f: @@ -402,7 +405,7 @@ def read_nodes(file, class_dict=dict()): Args: file (string): string of JSON file to laod - class_dict (dict): maps class names to classes for any non-JSON + class_dict (dict): maps class names to classes for any non-JSON serializable types that need constructed Returns: