From 0bb439b8d38f8ab59977b47771dcc88fba1fe436 Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:18:02 -0700
Subject: [PATCH 01/10] ensure depth is defined for the Root node

---
 pysidt/sidt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysidt/sidt.py b/pysidt/sidt.py
index 3782c12..7f6fe3f 100644
--- a/pysidt/sidt.py
+++ b/pysidt/sidt.py
@@ -97,7 +97,7 @@ def __init__(
                 node = node.parent
             self.root = node
         elif root_group:
-            self.root = Node(root_group, name="Root")
+            self.root = Node(root_group, name="Root", depth=0)
             self.nodes = {"Root": self.root}
 
     def load(self, nodes):

From f83a35961d35c88f70d1a05cc59c41af377c5111 Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:19:36 -0700
Subject: [PATCH 02/10] add function for dumping simple objects to dictionaries

---
 pysidt/sidt.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/pysidt/sidt.py b/pysidt/sidt.py
index 7f6fe3f..508c574 100644
--- a/pysidt/sidt.py
+++ b/pysidt/sidt.py
@@ -309,6 +309,33 @@ def evaluate(self, mol):
 
         return node.rule
 
+def to_dict(obj):
+    out_dict = dict()
+    out_dict["class"] = obj.__class__.__name__
+    attrs = [attr for attr in dir(obj) if not attr.startswith('_')]
+    for attr in attrs:
+        
+        val = getattr(obj, attr)
+        
+        if callable(val) or val == getattr(obj.__class__,attr):
+            continue
+        
+        try:
+            json.dumps(val)
+            out_dict[attr] = val
+        except:
+            if isinstance(val, ScalarQuantity):
+                out_dict[attr] = {
+                    "class": val.__class__.__name__,
+                    "value": val.value,
+                    "units": val.units,
+                    "uncertainty": val.uncertainty,
+                    "uncertainty_type": val.uncertainty_type,
+                }
+            else:
+                out_dict[attr] = to_dict(val)
+    
+    return out_dict
 
 def write_nodes(tree, file):
     nodesdict = dict()

From 0a3c5d62a3f32e0d709577886d13baa803b951c2 Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:19:57 -0700
Subject: [PATCH 03/10] add function for constructing simple objects from
 dictionaries

from_dict docstring
---
 pysidt/sidt.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pysidt/sidt.py b/pysidt/sidt.py
index 508c574..0b6f3d2 100644
--- a/pysidt/sidt.py
+++ b/pysidt/sidt.py
@@ -337,6 +337,32 @@ def to_dict(obj):
     
     return out_dict
 
+
+def from_dict(d, class_dict=None):
+    """construct objects from dictionary
+    
+    Args:
+        d (dict): dictionary describing object, particularly containing a value
+                associated with "class" identifying a string of the class of the object
+        class_dict (dict, optional): dictionary mapping class strings to the class objects/constructors
+
+    Returns:
+        object associated with dictionary
+    """
+    if class_dict is None:
+        class_dict = dict()
+
+    construct_d = dict()
+    for k, v in d.items():
+        if k == "class":
+            continue
+        if isinstance(v,dict) and "class" in v.keys():
+            construct_d[k] = from_dict(v, class_dict=class_dict)
+        else:
+            construct_d[k] = v
+            
+    return class_dict[d["class"]](**construct_d)
+
 def write_nodes(tree, file):
     nodesdict = dict()
     for node in tree.nodes.values():

From 61dea0fa09af1ee9ad95255704c1a07fe49e69ba Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:20:32 -0700
Subject: [PATCH 04/10] improve write_nodes to handle simple objects

write nodes amend
---
 pysidt/sidt.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/pysidt/sidt.py b/pysidt/sidt.py
index 0b6f3d2..48ae9c7 100644
--- a/pysidt/sidt.py
+++ b/pysidt/sidt.py
@@ -370,12 +370,26 @@ def write_nodes(tree, file):
             p = None
         else:
             p = node.parent.name
+            
+        try:
+            json.dumps(node.rule)
+            rule = node.rule
+        except (TypeError, OverflowError):
+            rule = to_dict(node.rule) #will work on all rmgmolecule objects, new objects need this method implemented
+            try:
+                json.dumps(rule)
+            except:
+                raise ValueError(
+                    f"Could not serialize object {node.rule.__class__.__name__}"
+                )
+
         nodesdict[node.name] = {
             "group": node.group.to_adjacency_list(),
-            "rule": node.rule,
+            "rule": rule,
             "parent": p,
             "children": [x.name for x in node.children],
             "name": node.name,
+            "depth": node.depth
         }
 
     with open(file, "w") as f:

From d19dda30160c5d5560dc7bcadc988b1950e67e3e Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:21:25 -0700
Subject: [PATCH 05/10] improve read_nodes to handle simple objects

---
 pysidt/sidt.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/pysidt/sidt.py b/pysidt/sidt.py
index 48ae9c7..72b168e 100644
--- a/pysidt/sidt.py
+++ b/pysidt/sidt.py
@@ -1,4 +1,5 @@
 from molecule.molecule import Group
+from molecule.quantity import ScalarQuantity
 from pysidt.extensions import split_mols, get_extension_edge
 from pysidt.regularization import simple_regularization
 from pysidt.decomposition import *
@@ -396,7 +397,17 @@ def write_nodes(tree, file):
         json.dump(nodesdict, f)
 
 
-def read_nodes(file):
+def read_nodes(file, class_dict=dict()):
+    """_summary_
+
+    Args:
+        file (string): string of JSON file to laod
+        class_dict (dict): maps class names to classes for any non-JSON 
+                    serializable types that need constructed
+
+    Returns:
+        nodes (list): list of nodes for tree
+    """
     with open(file, "r") as f:
         nodesdict = json.load(f)
     nodes = dict()
@@ -414,6 +425,8 @@ def read_nodes(file):
         if node.parent:
             node.parent = nodes[node.parent]
         node.children = [nodes[child] for child in node.children]
+        if isinstance(node.rule, dict) and "class" in node.rule.keys():
+            node.rule = from_dict(node.rule, class_dict=class_dict)
 
     return nodes
 

From d99a09d1ac87336205d1e99825acc0be7a45a9c9 Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:21:56 -0700
Subject: [PATCH 06/10] change environment file to use rmgmolecule

---
 environment.yml | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/environment.yml b/environment.yml
index e552313..4a8d0bf 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,15 +5,12 @@ channels:
   - mjohnson541
   - rmg
 dependencies:
-  - rdkit >=2020.03.3.0
-  - cython >=0.25.2
-  - conda-forge::openbabel >= 3
-  - cairo
-  - cairocffi
-  - pyrdl
+  - python >=3.7,<3.10
+  - jupyter
   - nose
-  - rmg::lpsolve55
-  - rmg::quantities
-  - scipy
+  - numpy
+  - pydot
+  - mjohnson541::rmgmolecule >=0.3.0
   - scikit-learn
-  - pydot
\ No newline at end of file
+  - scipy
+  

From 00dc2516d67e51598bbb0ada2681e2bdb5f693cb Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:26:42 -0700
Subject: [PATCH 07/10] add matplotlib to environment

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index 4a8d0bf..c736894 100644
--- a/environment.yml
+++ b/environment.yml
@@ -7,6 +7,7 @@ channels:
 dependencies:
   - python >=3.7,<3.10
   - jupyter
+  - matplotlib
   - nose
   - numpy
   - pydot

From 55ab89bda3baa4c9b075d1b23b0149bcb0b9a2f7 Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Tue, 19 Mar 2024 18:28:51 -0700
Subject: [PATCH 08/10] add notebook for Surface Diffusion single eval SIDT
 example

---
 .github/workflows/CI.yml                      |   1 +
 ...e_Diffusion_single_eval_SIDT_example.ipynb | 468 ++++++++++++++++++
 2 files changed, 469 insertions(+)
 create mode 100644 IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 24a055e..027e9f1 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -47,3 +47,4 @@ jobs:
       - name: Test PySIDT notebooks
         run: |
           pytest --nbmake IPython/multi_eval_SIDT_example.ipynb
+          pytest --nbmake IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb
diff --git a/IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb b/IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb
new file mode 100644
index 0000000..bce5a6a
--- /dev/null
+++ b/IPython/Surface_Diffusion_single_eval_SIDT_example.ipynb
@@ -0,0 +1,468 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b57e048e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pysidt import Datum, SubgraphIsomorphicDecisionTree, write_nodes, read_nodes\n",
+    "from pysidt.extensions import split_mols\n",
+    "from pysidt.plotting import plot_tree\n",
+    "import json\n",
+    "import logging\n",
+    "from molecule.molecule import Molecule,Group\n",
+    "from molecule.molecule.atomtype import ATOMTYPES\n",
+    "from molecule.kinetics import SurfaceArrhenius\n",
+    "from molecule.kinetics.uncertainties import RateUncertainty\n",
+    "from molecule.quantity import ScalarQuantity\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cf082bc8-733c-4cfc-b1dc-8fb1bcbcb62a",
+   "metadata": {},
+   "source": [
+    "Prepare Training Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bfd59dd3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Surface diffusion mean field rate coefficients on Cu111 computed from Pynta\n",
+    "diff_dict = {\n",
+    "    '1    C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}\\n2    H u0 p0 c0 {1,S}\\n3    H u0 p0 c0 {1,S}\\n4    H u0 p0 c0 {1,S}\\n5 *1 O u0 p2 c0 {1,S} {6,S}\\n6 *2 X u0 p0 c0 {5,S}\\n7 *3 X u0 p0 c0\\n': \n",
+    "    SurfaceArrhenius(A=(2.2223e-07,'m^2/(molecule*s)'), n=0.0174829, Ea=(15.6635,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K')),\n",
+    "    '1    N u0 p1 c0 {2,S} {3,S} {4,S}\\n2    H u0 p0 c0 {1,S}\\n3    H u0 p0 c0 {1,S}\\n4    C u0 p0 c0 {1,S} {5,S} {6,D}\\n5    H u0 p0 c0 {4,S}\\n6 *1 C u0 p0 c0 {4,D} {7,S} {8,S}\\n7    H u0 p0 c0 {6,S}\\n8 *2 X u0 p0 c0 {6,S}\\n9 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(5.13926e-10,'m^2/(molecule*s)'), n=1.01787, Ea=(10.2887,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00322, dn = +|- 0.000407601, dEa = +|- 0.00283132 kJ/mol\"\"\"),\n",
+    "    '1    O u0 p2 c0 {2,D}\\n2 *1 C u0 p0 c0 {1,D} {3,D}\\n3 *2 X u0 p0 c0 {2,D}\\n4 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(2.78393e-08,'m^2/(molecule*s)'), n=0.0503798, Ea=(4.61439,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00513, dn = +|- 0.000648231, dEa = +|- 0.00450281 kJ/mol\"\"\"),\n",
+    "    '1 *1 O u0 p2 c0 {2,D}\\n2 *2 X u0 p0 c0 {1,D}\\n3 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(4.36036e-07,'m^2/(molecule*s)'), n=0.0110633, Ea=(27.0082,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00512, dn = +|- 0.000646724, dEa = +|- 0.00449234 kJ/mol\"\"\"),\n",
+    "    '1 *1 N u0 p1 c0 {2,T}\\n2 *2 X u0 p0 c0 {1,T}\\n3 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(5.22663e-07,'m^2/(molecule*s)'), n=0.0171004, Ea=(17.9096,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00831, dn = +|- 0.00104785, dEa = +|- 0.0072787 kJ/mol\"\"\"),\n",
+    "    '1 *1 O u0 p2 c0 {2,S} {3,S}\\n2    H u0 p0 c0 {1,S}\\n3 *2 X u0 p0 c0 {1,S}\\n4 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(2.6936e-07,'m^2/(molecule*s)'), n=-0.024542, Ea=(13.3305,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00955, dn = +|- 0.00120318, dEa = +|- 0.00835765 kJ/mol\"\"\"),\n",
+    "    '1 *1 N u0 p1 c0 {2,S} {3,D}\\n2    H u0 p0 c0 {1,S}\\n3 *2 X u0 p0 c0 {1,D}\\n4 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(4.69378e-07,'m^2/(molecule*s)'), n=0.00356855, Ea=(22.6721,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.00213, dn = +|- 0.000270019, dEa = +|- 0.00187564 kJ/mol\"\"\"),\n",
+    "    '1    O u0 p2 c0 {2,D}\\n2 *1 C u0 p0 c0 {1,D} {3,S} {4,S}\\n3    H u0 p0 c0 {2,S}\\n4 *2 X u0 p0 c0 {2,S}\\n5 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(1.18904e-07,'m^2/(molecule*s)'), n=0.0763068, Ea=(1.53955,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.01967, dn = +|- 0.00246664, dEa = +|- 0.017134 kJ/mol\"\"\"),\n",
+    "    '1 *1 H u0 p0 c0 {2,S}\\n2 *2 X u0 p0 c0 {1,S}\\n3 *3 X u0 p0 c0\\n':\n",
+    "    SurfaceArrhenius(A=(8.66e-07,'m^2/(molecule*s)'), n=0.019382, Ea=(12.4395,'kJ/mol'), T0=(1,'K'), Tmin=(298,'K'), Tmax=(2500,'K'), comment=\"\"\"Fitted to 50 data points; dA = *|/ 1.02604, dn = +|- 0.00325525, dEa = +|- 0.022612 kJ/mol\"\"\"), \n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10facbba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(diff_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c5833c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_data = []\n",
+    "for adjlist,kin in diff_dict.items():\n",
+    "    d = Datum(Molecule().from_adjacency_list(adjlist), kin)\n",
+    "    training_data.append(d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f92c02e1-2985-4160-8c62-897914e1dd9a",
+   "metadata": {},
+   "source": [
+    "Class for Surface Diffusion Estimation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "94bf9f49",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SurfaceDiffusionEstimator(SubgraphIsomorphicDecisionTree):\n",
+    "    \n",
+    "    def choose_extension(self, node, exts):\n",
+    "        \"\"\"\n",
+    "        select best extension among the set of extensions\n",
+    "        returns a Node object\n",
+    "        almost always subclassed\n",
+    "        \"\"\"\n",
+    "        Tref = 1000.0\n",
+    "        minval = np.inf\n",
+    "        minext = None\n",
+    "        for ext in exts:\n",
+    "            new,comp = split_mols(node.items, ext)\n",
+    "            val = np.std([np.log(x.value.get_rate_coefficient(T=Tref)) for x in new])*len(new) + np.std([np.log(x.value.get_rate_coefficient(T=Tref)) for x in comp])*len(comp)\n",
+    "            if val < minval:\n",
+    "                minval = val \n",
+    "                minext = ext \n",
+    "        \n",
+    "        return minext\n",
+    "    \n",
+    "    def fit_tree(self, data=None):\n",
+    "        \"\"\"\n",
+    "        fit rule for each node\n",
+    "        \"\"\"\n",
+    "        Tref = 1000.0\n",
+    "        fmax = 1.0e5\n",
+    "        if data:\n",
+    "            self.clear_data()\n",
+    "            self.root.items = data[:]\n",
+    "            self.descend_training_from_top(only_specific_match=False)\n",
+    "        \n",
+    "        for node in self.nodes.values():\n",
+    "            if len(node.items) == 0:\n",
+    "                logging.error(node.name)\n",
+    "                raise ValueError\n",
+    "                \n",
+    "            node.rule = average_kinetics(node.items)\n",
+    "            \n",
+    "            data_mean = np.mean(np.log([k.value.get_rate_coefficient(Tref) for k in node.items]))\n",
+    "            n = len(node.items)\n",
+    "            \n",
+    "            if len(node.items) == 1:\n",
+    "                node.rule.uncertainty = RateUncertainty(mu=0.0, var=(np.log(fmax) / 2.0) ** 2, N=1, Tref=Tref, data_mean=data_mean, correlation=node.name)\n",
+    "                node.rule.comment = f\"Only one reaction rate\"\n",
+    "            else:\n",
+    "                dlnks = np.array([\n",
+    "                    np.log(average_kinetics([node.items[k] for k in list(set(range(len(node.items)))-{i})]).get_rate_coefficient(Tref) / \n",
+    "                          node.items[i].value.get_rate_coefficient(Tref)) for i in range(len(node.items))\n",
+    "                ])\n",
+    "                mu = np.mean(dlnks)\n",
+    "                s = np.std(dlnks)\n",
+    "                node.rule.uncertainty = RateUncertainty(mu=mu, var=s ** 2, N=n, Tref=Tref, data_mean=data_mean, correlation=node.name)\n",
+    "        \n",
+    "    def evaluate(self, mol):\n",
+    "        \"\"\"\n",
+    "        Evaluate tree for a given possibly labeled mol\n",
+    "        \"\"\"\n",
+    "        children = self.root.children \n",
+    "        node = self.root\n",
+    "        \n",
+    "        while children != []:\n",
+    "            for child in children:\n",
+    "                if mol.is_subgraph_isomorphic(child.group, generate_initial_map=True, save_order=True):\n",
+    "                    children = child.children \n",
+    "                    node = child\n",
+    "                    break\n",
+    "            else:\n",
+    "                break\n",
+    "\n",
+    "        while node.parent is not None:\n",
+    "            err_parent = abs(node.parent.rule.uncertainty.data_mean + node.parent.rule.uncertainty.mu - node.parent.rule.uncertainty.data_mean) + np.sqrt(2.0*node.parent.rule.uncertainty.var/np.pi)\n",
+    "            err_entry = abs(node.rule.uncertainty.mu) + np.sqrt(2.0*node.rule.uncertainty.var/np.pi)\n",
+    "            if err_entry <= err_parent:\n",
+    "                break\n",
+    "            else:\n",
+    "                node = node.parent\n",
+    "        \n",
+    "        return node.rule\n",
+    "\n",
+    "def average_kinetics(items):\n",
+    "    Aunits = items[0].value.A.units\n",
+    "    \n",
+    "    if Aunits in {'cm^3/(mol*s)', 'cm^3/(molecule*s)', 'm^3/(molecule*s)'}:\n",
+    "        Aunits = 'm^3/(mol*s)'\n",
+    "    elif Aunits in {'cm^6/(mol^2*s)', 'cm^6/(molecule^2*s)', 'm^6/(molecule^2*s)'}:\n",
+    "        Aunits = 'm^6/(mol^2*s)'\n",
+    "    elif Aunits in {'s^-1', 'm^3/(mol*s)', 'm^6/(mol^2*s)'}:\n",
+    "        # they were already in SI\n",
+    "        pass\n",
+    "    elif Aunits in {'m^2/(mol*s)', 'cm^2/(mol*s)', 'm^2/(molecule*s)', 'cm^2/(molecule*s)'}:\n",
+    "        # surface: bimolecular (Langmuir-Hinshelwood)\n",
+    "        Aunits = 'm^2/(mol*s)'\n",
+    "    elif Aunits in {'m^5/(mol^2*s)', 'cm^5/(mol^2*s)', 'm^5/(molecule^2*s)', 'cm^5/(molecule^2*s)'}:\n",
+    "        # surface: dissociative adsorption\n",
+    "        Aunits = 'm^5/(mol^2*s)'\n",
+    "    elif Aunits == '':\n",
+    "        # surface: sticking coefficient\n",
+    "        pass\n",
+    "    else:\n",
+    "        raise ValueError(f'Invalid units {Aunits} for averaging kinetics.')\n",
+    "    \n",
+    "    A = np.exp(sum(np.log(d.value.A.value_si) for d in items)/len(items))\n",
+    "    n = sum(d.value.n.value_si for d in items)/len(items)\n",
+    "    Ea = sum(d.value.Ea.value_si for d in items)/len(items)\n",
+    "    return SurfaceArrhenius(A=(A,Aunits), n=n, Ea=(Ea,\"J/mol\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0e3fc001-98ca-4da0-a659-bd5239404fe1",
+   "metadata": {},
+   "source": [
+    "Generate Tree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b6f7d6f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "root = Group().from_adjacency_list('1 *1 R u0 px cx {2,[S,D,T,Q]}\\n2 *2 X u0 p0 c0 {1,[S,D,T,Q]}\\n3 *3 X u0 p0 c0\\n')\n",
+    "sidt = SurfaceDiffusionEstimator(root_group=root,\n",
+    "            r=[ATOMTYPES[x] for x in [\"X\",\"H\",\"C\",\"O\",\"N\"]], r_bonds=[1,2,3,1.5,4,0.0],\n",
+    "                                                         r_un=[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91374aac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sidt.generate_tree(data=training_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82bab66c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sidt.fit_tree(data=training_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a12b7fc-eb9f-4fea-a0fd-4747b0a5b495",
+   "metadata": {},
+   "source": [
+    "Visualize Tree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5923f619",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_tree(sidt, images=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9cfe8492",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_tree(sidt, images=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f370a185-5654-4f34-83e4-821ad783346b",
+   "metadata": {},
+   "source": [
+    "Tree Evaluation Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a245c12e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m = Molecule().from_adjacency_list(\"\"\"\n",
+    "1 H u0 p0 c0 {2,S}\n",
+    "2 O u0 p2 c0 {1,S} {3,S}\n",
+    "3 *1 O u0 p2 c0 {2,S} {4,S}\n",
+    "4 *2 X u0 p0 c0 {3,S}\n",
+    "5 *3 X u0 p0 c0\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3dcfbca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out = sidt.evaluate(m)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c757052",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a4cb9c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out.uncertainty"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e881b090-d084-408c-9fdf-4bc8b60ce91b",
+   "metadata": {},
+   "source": [
+    "Analyze Accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f9c597b-6e1b-4487-b95f-caab91dee454",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_one_node_up(sidt, mol):\n",
+    "    \"\"\"\n",
+    "    Evaluate tree except that if the node was defined by a single reaction move one node up\n",
+    "    \"\"\"\n",
+    "    children = sidt.root.children \n",
+    "    node = sidt.root\n",
+    "    \n",
+    "    while children != []:\n",
+    "        for child in children:\n",
+    "            if mol.is_subgraph_isomorphic(child.group, generate_initial_map=True, save_order=True):\n",
+    "                children = child.children \n",
+    "                node = child\n",
+    "                break\n",
+    "        else:\n",
+    "            break\n",
+    "\n",
+    "    while node.parent is not None:\n",
+    "        err_parent = abs(node.parent.rule.uncertainty.data_mean + node.parent.rule.uncertainty.mu - node.parent.rule.uncertainty.data_mean) + np.sqrt(2.0*node.parent.rule.uncertainty.var/np.pi)\n",
+    "        err_entry = abs(node.rule.uncertainty.mu) + np.sqrt(2.0*node.rule.uncertainty.var/np.pi)\n",
+    "        if err_entry <= err_parent:\n",
+    "            break\n",
+    "        else:\n",
+    "            node = node.parent\n",
+    "    \n",
+    "    if node.children:\n",
+    "        return node.rule\n",
+    "    else:\n",
+    "        return node.parent.rule"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "baf1d4fb-217c-44b5-b5c0-b223aea9dcd3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "err = [np.abs(np.log(evaluate_one_node_up(sidt,Molecule().from_adjacency_list(st)).get_rate_coefficient(800.0)/diff.get_rate_coefficient(800.0))) for st,diff in diff_dict.items()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5bf5ade6-2c42-4594-82b3-112b685f29ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.hist(err,bins=5, density=True)\n",
+    "plt.ylabel(\"Density\")\n",
+    "plt.xlabel(\"$\\Delta Log(k)$ in Diffusion Rate Coefficient Prediction at 800 K\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "594ca4c1-74b9-4d2e-9297-628cef5d1bc4",
+   "metadata": {},
+   "source": [
+    "Save and Load Tree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64e7d327-80ab-44e9-833f-a1b2810df86e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file = \"Surface_Diffusion_tree.json\"\n",
+    "write_nodes(sidt, file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4cee908-fe69-46f5-a347-9947344f6a53",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes = read_nodes(file, class_dict={\"SurfaceArrhenius\": SurfaceArrhenius, \"RateUncertainty\": RateUncertainty, \"ScalarQuantity\": ScalarQuantity})\n",
+    "sidt_loaded = SurfaceDiffusionEstimator(nodes=nodes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b892557-7f1e-4deb-a558-8bc57f341441",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sidt_loaded.evaluate(m)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "89e1d7b0-b277-4365-bd43-8958dc4a7396",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 714bb24b4479ea1cf56b17c81088846fbada4d1b Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Wed, 20 Mar 2024 12:17:09 -0700
Subject: [PATCH 09/10] remove source install of rmgmolecule in CI tests

---
 .github/workflows/CI.yml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 027e9f1..894407f 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -17,11 +17,6 @@ jobs:
       - name: Checkout PySIDT
         uses: actions/checkout@v4
 
-      - name: Clone ReactionMechanismGenerator/molecule
-        run: |
-          cd ..
-          git clone -b ts_len_improvements https://github.com/ReactionMechanismGenerator/molecule.git
-
       - name: Create pysidt environment
         uses: conda-incubator/setup-miniconda@v3
         with:
@@ -37,13 +32,6 @@ jobs:
           python -m pip install pytest nbmake
           python -m pip install -e .
 
-      - name: Make and install molecule
-        run: |
-          cd ../molecule
-          make clean
-          make
-          pip install -e .
-
       - name: Test PySIDT notebooks
         run: |
           pytest --nbmake IPython/multi_eval_SIDT_example.ipynb

From 40dad62b5b70ca957e4901d4deb1ed3ff93b7c31 Mon Sep 17 00:00:00 2001
From: Hao-Wei Pang <hwpang@mit.edu>
Date: Thu, 21 Mar 2024 09:29:15 -0400
Subject: [PATCH 10/10] Formatting

---
 pysidt/sidt.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/pysidt/sidt.py b/pysidt/sidt.py
index 72b168e..18c5ba2 100644
--- a/pysidt/sidt.py
+++ b/pysidt/sidt.py
@@ -310,17 +310,17 @@ def evaluate(self, mol):
 
         return node.rule
 
+
 def to_dict(obj):
     out_dict = dict()
     out_dict["class"] = obj.__class__.__name__
-    attrs = [attr for attr in dir(obj) if not attr.startswith('_')]
+    attrs = [attr for attr in dir(obj) if not attr.startswith("_")]
     for attr in attrs:
-        
         val = getattr(obj, attr)
-        
-        if callable(val) or val == getattr(obj.__class__,attr):
+
+        if callable(val) or val == getattr(obj.__class__, attr):
             continue
-        
+
         try:
             json.dumps(val)
             out_dict[attr] = val
@@ -335,7 +335,7 @@ def to_dict(obj):
                 }
             else:
                 out_dict[attr] = to_dict(val)
-    
+
     return out_dict
 
 
@@ -357,13 +357,14 @@ def from_dict(d, class_dict=None):
     for k, v in d.items():
         if k == "class":
             continue
-        if isinstance(v,dict) and "class" in v.keys():
+        if isinstance(v, dict) and "class" in v.keys():
             construct_d[k] = from_dict(v, class_dict=class_dict)
         else:
             construct_d[k] = v
-            
+
     return class_dict[d["class"]](**construct_d)
 
+
 def write_nodes(tree, file):
     nodesdict = dict()
     for node in tree.nodes.values():
@@ -371,12 +372,14 @@ def write_nodes(tree, file):
             p = None
         else:
             p = node.parent.name
-            
+
         try:
             json.dumps(node.rule)
             rule = node.rule
         except (TypeError, OverflowError):
-            rule = to_dict(node.rule) #will work on all rmgmolecule objects, new objects need this method implemented
+            rule = to_dict(
+                node.rule
+            )  # will work on all rmgmolecule objects, new objects need this method implemented
             try:
                 json.dumps(rule)
             except:
@@ -390,7 +393,7 @@ def write_nodes(tree, file):
             "parent": p,
             "children": [x.name for x in node.children],
             "name": node.name,
-            "depth": node.depth
+            "depth": node.depth,
         }
 
     with open(file, "w") as f:
@@ -402,7 +405,7 @@ def read_nodes(file, class_dict=dict()):
 
     Args:
         file (string): string of JSON file to laod
-        class_dict (dict): maps class names to classes for any non-JSON 
+        class_dict (dict): maps class names to classes for any non-JSON
                     serializable types that need constructed
 
     Returns: