ReactionMechanismGenerator · sevyharris · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 22, 2025
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -261,7 +261,6 @@ jobs:
           conda activate rmg_env_without_rms
 
           exec 2> >(tee -a regression.stderr >&2) 1> >(tee -a regression.stdout)
-          touch checkModels.log
           mkdir -p "test/regression-diff"
           for regr_test in aromatics liquid_oxidation nitrogen oxidation sulfur superminimal RMS_constantVIdealGasReactor_superminimal RMS_CSTR_liquid_oxidation fragment RMS_constantVIdealGasReactor_fragment minimal_surface;
           do
@@ -284,7 +283,7 @@ jobs:
                 $REFERENCE/"$regr_test"/chemkin/chem_annotated.inp \
                 $REFERENCE/"$regr_test"/chemkin/species_dictionary.txt \
                 test/regression/"$regr_test"/chemkin/chem_annotated.inp \
-                test/regression/"$regr_test"/chemkin/species_dictionary.txt &> checkModels.log
+                test/regression/"$regr_test"/chemkin/species_dictionary.txt &> checkModels.err
             then
               echo "<summary>$regr_test Passed Core Comparison ✅</summary>"
             else
@@ -295,13 +294,19 @@ jobs:
             echo "" # blank line so next block is interpreted as markdown
             cat "$regr_test-core.log" || (echo "Dumping the whole log failed, please download it from GitHub actions. Here are the first 100 lines:" && head -n100 "$regr_test-core.log")
             echo "</details>"
+            if [ -s checkModels.err ]; then
+              echo "<details>"
+              echo "<summary>Errors occurred during core comparison ⚠️</summary>"
+              cat checkModels.err
+              echo "</details>"
+            fi
             echo "<details>"
             if python scripts/checkModels.py \
                 "$regr_test-edge" \
                 $REFERENCE/"$regr_test"/chemkin/chem_edge_annotated.inp \
                 $REFERENCE/"$regr_test"/chemkin/species_edge_dictionary.txt \
                 test/regression/"$regr_test"/chemkin/chem_edge_annotated.inp \
-                test/regression/"$regr_test"/chemkin/species_edge_dictionary.txt &> checkModels.log
+                test/regression/"$regr_test"/chemkin/species_edge_dictionary.txt &> checkModels.err
             then
               echo "<summary>$regr_test Passed Edge Comparison ✅</summary>"
             else
@@ -310,8 +315,14 @@ jobs:
               export FAILED=Yes
             fi
             echo "" # blank line so next block is interpreted as markdown
-            cat "$regr_test-edge.log" || (echo "Dumping the whole log failed, please download it from GitHub actions. Here are the first 100 lines:" && head -n100 "$regr_test-core.log")
+            cat "$regr_test-edge.log" || (echo "Dumping the whole log failed, please download it from GitHub actions. Here are the first 100 lines:" && head -n100 "$regr_test-edge.log")
             echo "</details>"
+            if [ -s checkModels.err ]; then
+              echo "<details>"
+              echo "<summary>Errors occurred during edge comparison ⚠️</summary>"
+              cat checkModels.err
+              echo "</details>"
+            fi
 
             # Check for Regression between Reference and Dynamic (skip superminimal)
             if [ -f test/regression/"$regr_test"/regression_input.py ];
@@ -320,14 +331,20 @@ jobs:
               if python rmgpy/tools/regression.py \
                 test/regression/"$regr_test"/regression_input.py \
                 $REFERENCE/"$regr_test"/chemkin \
-                test/regression/"$regr_test"/chemkin &> checkModels.log
+                test/regression/"$regr_test"/chemkin 2> regression.py.err
               then
                 echo "<summary>$regr_test Passed Observable Testing ✅</summary>"
               else
                 echo "<summary>$regr_test Failed Observable Testing ❌</summary>"
                 export FAILED=Yes
               fi
               echo "</details>"
+              if [ -s regression.py.err ]; then
+                echo "<details>"
+                echo "<summary>Errors occurred during observable testing ⚠️</summary>"
+                cat regression.py.err
+                echo "</details>"
+              fi
             fi
             echo ""
           done
@@ -349,18 +366,13 @@ jobs:
           cat regression.stdout >> summary.txt
           echo "</details>" >> summary.txt
           echo "" >> summary.txt
-          echo "<details>" >> summary.txt
-          echo "<summary>Debugging info for `checkModels.py` (if any).</summary>" >> summary.txt
-          cat checkModels.log >> summary.txt
-          echo "</details>" >> summary.txt
-          echo "" >> summary.txt
           echo "_beep boop this comment was written by a bot_ :robot:" >> summary.txt
           cat summary.txt > $GITHUB_STEP_SUMMARY
 
       - name: Upload regression summary artifact
-       # the annotate workflow uses this artifact to add a comment to the PR
+        # the annotate workflow uses this artifact to add a comment to the PR
         uses: actions/upload-artifact@v4
-        if : ${{ github.event_name == 'pull_request' }}
+        if: ${{ github.event_name == 'pull_request' }}
         with:
           name: regression_summary
           path: summary.txt

diff --git a/rmgpy/data/kinetics/database.py b/rmgpy/data/kinetics/database.py
@@ -752,42 +752,52 @@ def reconstruct_kinetics_from_source(self, reaction, source, fix_barrier_height=
                 else:
                     kinetics = training_entry.data
             elif 'Rate Rules' in source:
-
                 source_dict = source['Rate Rules'][1]
                 rules = source_dict['rules']
                 training = source_dict['training']
                 degeneracy = source_dict['degeneracy']
 
-                log_a = 0
-                n = 0
-                alpha = 0
-                E0 = 0
-                for rule_entry, weight in rules:
-                    log_a += np.log10(rule_entry.data.A.value_si) * weight
-                    n += rule_entry.data.n.value_si * weight
-                    alpha += rule_entry.data.alpha.value_si * weight
-                    E0 += rule_entry.data.E0.value_si * weight
-                for rule_entry, training_entry, weight in training:
-                    log_a += np.log10(rule_entry.data.A.value_si) * weight
-                    n += rule_entry.data.n.value_si * weight
-                    alpha += rule_entry.data.alpha.value_si * weight
-                    E0 += rule_entry.data.E0.value_si * weight
-
-                a_units = rule_entry.data.A.units
-                if a_units == 'cm^3/(mol*s)' or a_units == 'cm^3/(molecule*s)' or a_units == 'm^3/(molecule*s)':
-                    a_units = 'm^3/(mol*s)'
-                elif a_units == 'cm^6/(mol^2*s)' or a_units == 'cm^6/(molecule^2*s)' or a_units == 'm^6/(molecule^2*s)':
-                    a_units = 'm^6/(mol^2*s)'
-                elif a_units == 's^-1' or a_units == 'm^3/(mol*s)' or a_units == 'm^6/(mol^2*s)':
-                    pass
-                else:
-                    raise ValueError('Invalid units {0} for averaging kinetics.'.format(a_units))
-                kinetics = ArrheniusEP(
-                    A=(degeneracy * 10 ** log_a, a_units),
-                    n=n,
-                    alpha=alpha,
-                    E0=(E0 * 0.001, "kJ/mol"),
-                )
+                if rules and isinstance(rules[0][0].data, ArrheniusBM):
+                    # This is a rate rule with ArrheniusBM kinetics
+                    assert len(rules) == 1, "There should only be one rate rule for ArrheniusBM kinetics in the autogenerated trees"
+                    kinetics = ArrheniusBM(  # have to create a new object to avoid modifying the original when we multiply by degeneracy
+                        A=rules[0][0].data.A,
+                        n=rules[0][0].data.n,
+                        w0=rules[0][0].data.w0,
+                        E0=rules[0][0].data.E0,
+                    )
+                    kinetics.A.value_si *= degeneracy
+                else:  # ArrheniusEP kinetics
+                    log_a = 0
+                    n = 0
+                    alpha = 0
+                    E0 = 0
+                    for rule_entry, weight in rules:
+                        log_a += np.log10(rule_entry.data.A.value_si) * weight
+                        n += rule_entry.data.n.value_si * weight
+                        alpha += rule_entry.data.alpha.value_si * weight
+                        E0 += rule_entry.data.E0.value_si * weight
+                    for rule_entry, training_entry, weight in training:
+                        log_a += np.log10(rule_entry.data.A.value_si) * weight
+                        n += rule_entry.data.n.value_si * weight
+                        alpha += rule_entry.data.alpha.value_si * weight
+                        E0 += rule_entry.data.E0.value_si * weight
+                    a_units = rule_entry.data.A.units
+                    if a_units == 'cm^3/(mol*s)' or a_units == 'cm^3/(molecule*s)' or a_units == 'm^3/(molecule*s)':
+                        a_units = 'm^3/(mol*s)'
+                    elif a_units == 'cm^6/(mol^2*s)' or a_units == 'cm^6/(molecule^2*s)' or a_units == 'm^6/(molecule^2*s)':
+                        a_units = 'm^6/(mol^2*s)'
+                    elif a_units == 's^-1' or a_units == 'm^3/(mol*s)' or a_units == 'm^6/(mol^2*s)':
+                        pass
+                    else:
+                        raise ValueError('Invalid units {0} for averaging kinetics.'.format(a_units))
+
+                    kinetics = ArrheniusEP(
+                        A=(degeneracy * 10 ** log_a, a_units),
+                        n=n,
+                        alpha=alpha,
+                        E0=(E0 * 0.001, "kJ/mol"),
+                    )
             else:
                 raise ValueError("Source data must be either 'Library', 'PDep','Training', or 'Rate Rules'.")
 

diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py
@@ -4442,24 +4442,29 @@ def extract_source_from_comments(self, reaction):
         """
         lines = reaction.kinetics.comment.split('\n')
 
-        exact = False
+        exact_rule = False
         template = None
         rules = None
         training_entries = None
         degeneracy = 1
 
-        regex = r"\[(.*)\]"  # only hit outermost brackets
+        training_reaction_pattern = r'Matched reaction\s*(\d+).*in.*training'
+        degeneracy_pattern = r'Multiplied by reaction path degeneracy\s*(\d+)'
+
         for line in lines:
-            if line.startswith('Matched'):
+            training_matches = re.search(training_reaction_pattern, line)
+            degeneracy_matches = re.search(degeneracy_pattern, line)
+
+            if training_matches is not None:
                 # Source of the kinetics is from training reaction
-                training_reaction_index = int(line.split()[2])
+                training_reaction_index = int(training_matches.group(1))
                 depository = self.get_training_depository()
                 training_entry = depository.entries[training_reaction_index]
                 # Perform sanity check that the training reaction's label matches that of the comments
                 if training_entry.label not in line:
-                    raise AssertionError('Reaction {0} uses kinetics from training reaction {1} '
-                                         'but does not match the training reaction {1} from the '
-                                         '{2} family.'.format(reaction, training_reaction_index, self.label))
+                    raise AssertionError(f'Reaction {reaction} uses kinetics from training reaction {training_reaction_index} '
+                                         f'but does not match the training reaction {training_reaction_index} from the '
+                                         f'{self.label} family.')
 
                 # Sometimes the matched kinetics could be in the reverse direction.....
                 if reaction.is_isomorphic(training_entry.item, either_direction=False, save_order=self.save_order):
@@ -4468,34 +4473,34 @@ def extract_source_from_comments(self, reaction):
                     reverse = True
                 return True, [self.label, training_entry, reverse]
 
-            elif line.startswith('Exact match'):
-                exact = True
-            elif line.startswith('Estimated'):
-                pass
-            elif line.startswith('Multiplied by'):
-                degeneracy = float(line.split()[-1])
+            if 'Exact match found for rate rule' in line:
+                exact_rule = True
+            if degeneracy_matches is not None:
+                degeneracy = float(degeneracy_matches.group(1))
 
         # Extract the rate rule information
         full_comment_string = reaction.kinetics.comment.replace('\n', ' ')
-
+        autogen_node_search_pattern = r'Estimated from node (.*)'
         # The rate rule string is right after the phrase 'for rate rule'
-        rate_rule_string = full_comment_string.split("for rate rule", 1)[1].strip()
-
-        if rate_rule_string[0] == '[':
-            # Get the contents of the capture group in the regex
-            # Remove any spaces which may be left over as a result of a line break
-            template_label = re.split(regex, rate_rule_string)[1].replace(' ', '')
+        template_pattern = r"for rate rule \[(.*)\]"  # only hit outermost brackets
+        autogen_node_matches = re.search(autogen_node_search_pattern, full_comment_string)
+        template_matches = re.search(template_pattern, full_comment_string)
+        if autogen_node_matches is not None:  # autogenerated trees
+            template_str = autogen_node_matches.group(1).split('Multiplied by reaction path degeneracy')[0].strip()
+            tokens = template_str.split()
+            if len(tokens) == 2:  # The node was probably split because wordwrap was turned off
+                assert len(template_str) > 115, 'The node name is too short to have been broken up by the chemkin writer'
+                template_str = ''.join(tokens)
+            elif len(tokens) > 2:  # warn the user the node is probably wrong
+                raise ValueError(f'The node name {template_str} has multiple spaces and cannot be parsed for reaction {reaction}.')
+            template = self.retrieve_template([template_str])
+        elif template_matches is not None:  # hand-built trees
+            template_label = template_matches.group(1)
+            template = self.retrieve_template(template_label.split(';'))
         else:
-            # If this has the line 'From training reaction # for rate rule node1;node2'
-            template_label = rate_rule_string.split()[0]
-
-        template = self.retrieve_template(template_label.split(';'))
+            raise ValueError(f'Could not find rate rule in comments for reaction {reaction}.')
         rules, training_entries = self.get_sources_for_template(template)
-
-        if not template:
-            raise ValueError('Could not extract kinetics source from comments for reaction {}.'.format(reaction))
-
-        source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact,
+        source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact_rule,
                        'rules': rules, 'training': training_entries}
 
         # Source of the kinetics is from rate rules