Skip to content

Update comment parser for autogen trees #2787

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ jobs:
conda activate rmg_env_without_rms

exec 2> >(tee -a regression.stderr >&2) 1> >(tee -a regression.stdout)
touch checkModels.log
mkdir -p "test/regression-diff"
for regr_test in aromatics liquid_oxidation nitrogen oxidation sulfur superminimal RMS_constantVIdealGasReactor_superminimal RMS_CSTR_liquid_oxidation fragment RMS_constantVIdealGasReactor_fragment minimal_surface;
do
Expand All @@ -284,7 +283,7 @@ jobs:
$REFERENCE/"$regr_test"/chemkin/chem_annotated.inp \
$REFERENCE/"$regr_test"/chemkin/species_dictionary.txt \
test/regression/"$regr_test"/chemkin/chem_annotated.inp \
test/regression/"$regr_test"/chemkin/species_dictionary.txt &> checkModels.log
test/regression/"$regr_test"/chemkin/species_dictionary.txt &> checkModels.err
then
echo "<summary>$regr_test Passed Core Comparison ✅</summary>"
else
Expand All @@ -295,13 +294,19 @@ jobs:
echo "" # blank line so next block is interpreted as markdown
cat "$regr_test-core.log" || (echo "Dumping the whole log failed, please download it from GitHub actions. Here are the first 100 lines:" && head -n100 "$regr_test-core.log")
echo "</details>"
if [ -s checkModels.err ]; then
echo "<details>"
echo "<summary>Errors occurred during core comparison ⚠️</summary>"
cat checkModels.err
echo "</details>"
fi
echo "<details>"
if python scripts/checkModels.py \
"$regr_test-edge" \
$REFERENCE/"$regr_test"/chemkin/chem_edge_annotated.inp \
$REFERENCE/"$regr_test"/chemkin/species_edge_dictionary.txt \
test/regression/"$regr_test"/chemkin/chem_edge_annotated.inp \
test/regression/"$regr_test"/chemkin/species_edge_dictionary.txt &> checkModels.log
test/regression/"$regr_test"/chemkin/species_edge_dictionary.txt &> checkModels.err
then
echo "<summary>$regr_test Passed Edge Comparison ✅</summary>"
else
Expand All @@ -310,8 +315,14 @@ jobs:
export FAILED=Yes
fi
echo "" # blank line so next block is interpreted as markdown
cat "$regr_test-edge.log" || (echo "Dumping the whole log failed, please download it from GitHub actions. Here are the first 100 lines:" && head -n100 "$regr_test-core.log")
cat "$regr_test-edge.log" || (echo "Dumping the whole log failed, please download it from GitHub actions. Here are the first 100 lines:" && head -n100 "$regr_test-edge.log")
echo "</details>"
if [ -s checkModels.err ]; then
echo "<details>"
echo "<summary>Errors occurred during edge comparison ⚠️</summary>"
cat checkModels.err
echo "</details>"
fi

# Check for Regression between Reference and Dynamic (skip superminimal)
if [ -f test/regression/"$regr_test"/regression_input.py ];
Expand All @@ -320,14 +331,20 @@ jobs:
if python rmgpy/tools/regression.py \
test/regression/"$regr_test"/regression_input.py \
$REFERENCE/"$regr_test"/chemkin \
test/regression/"$regr_test"/chemkin &> checkModels.log
test/regression/"$regr_test"/chemkin 2> regression.py.err
then
echo "<summary>$regr_test Passed Observable Testing ✅</summary>"
else
echo "<summary>$regr_test Failed Observable Testing ❌</summary>"
export FAILED=Yes
fi
echo "</details>"
if [ -s regression.py.err ]; then
echo "<details>"
echo "<summary>Errors occurred during observable testing ⚠️</summary>"
cat regression.py.err
echo "</details>"
fi
fi
echo ""
done
Expand All @@ -349,18 +366,13 @@ jobs:
cat regression.stdout >> summary.txt
echo "</details>" >> summary.txt
echo "" >> summary.txt
echo "<details>" >> summary.txt
echo "<summary>Debugging info for `checkModels.py` (if any).</summary>" >> summary.txt
cat checkModels.log >> summary.txt
echo "</details>" >> summary.txt
echo "" >> summary.txt
echo "_beep boop this comment was written by a bot_ :robot:" >> summary.txt
cat summary.txt > $GITHUB_STEP_SUMMARY

- name: Upload regression summary artifact
# the annotate workflow uses this artifact to add a comment to the PR
# the annotate workflow uses this artifact to add a comment to the PR
uses: actions/upload-artifact@v4
if : ${{ github.event_name == 'pull_request' }}
if: ${{ github.event_name == 'pull_request' }}
with:
name: regression_summary
path: summary.txt
Expand Down
72 changes: 41 additions & 31 deletions rmgpy/data/kinetics/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,42 +752,52 @@ def reconstruct_kinetics_from_source(self, reaction, source, fix_barrier_height=
else:
kinetics = training_entry.data
elif 'Rate Rules' in source:

source_dict = source['Rate Rules'][1]
rules = source_dict['rules']
training = source_dict['training']
degeneracy = source_dict['degeneracy']

log_a = 0
n = 0
alpha = 0
E0 = 0
for rule_entry, weight in rules:
log_a += np.log10(rule_entry.data.A.value_si) * weight
n += rule_entry.data.n.value_si * weight
alpha += rule_entry.data.alpha.value_si * weight
E0 += rule_entry.data.E0.value_si * weight
for rule_entry, training_entry, weight in training:
log_a += np.log10(rule_entry.data.A.value_si) * weight
n += rule_entry.data.n.value_si * weight
alpha += rule_entry.data.alpha.value_si * weight
E0 += rule_entry.data.E0.value_si * weight

a_units = rule_entry.data.A.units
if a_units == 'cm^3/(mol*s)' or a_units == 'cm^3/(molecule*s)' or a_units == 'm^3/(molecule*s)':
a_units = 'm^3/(mol*s)'
elif a_units == 'cm^6/(mol^2*s)' or a_units == 'cm^6/(molecule^2*s)' or a_units == 'm^6/(molecule^2*s)':
a_units = 'm^6/(mol^2*s)'
elif a_units == 's^-1' or a_units == 'm^3/(mol*s)' or a_units == 'm^6/(mol^2*s)':
pass
else:
raise ValueError('Invalid units {0} for averaging kinetics.'.format(a_units))
kinetics = ArrheniusEP(
A=(degeneracy * 10 ** log_a, a_units),
n=n,
alpha=alpha,
E0=(E0 * 0.001, "kJ/mol"),
)
if rules and isinstance(rules[0][0].data, ArrheniusBM):
# This is a rate rule with ArrheniusBM kinetics
assert len(rules) == 1, "There should only be one rate rule for ArrheniusBM kinetics in the autogenerated trees"
kinetics = ArrheniusBM( # have to create a new object to avoid modifying the original when we multiply by degeneracy
A=rules[0][0].data.A,
n=rules[0][0].data.n,
w0=rules[0][0].data.w0,
E0=rules[0][0].data.E0,
)
kinetics.A.value_si *= degeneracy
else: # ArrheniusEP kinetics
log_a = 0
n = 0
alpha = 0
E0 = 0
for rule_entry, weight in rules:
log_a += np.log10(rule_entry.data.A.value_si) * weight
n += rule_entry.data.n.value_si * weight
alpha += rule_entry.data.alpha.value_si * weight
E0 += rule_entry.data.E0.value_si * weight
for rule_entry, training_entry, weight in training:
log_a += np.log10(rule_entry.data.A.value_si) * weight
n += rule_entry.data.n.value_si * weight
alpha += rule_entry.data.alpha.value_si * weight
E0 += rule_entry.data.E0.value_si * weight
a_units = rule_entry.data.A.units
if a_units == 'cm^3/(mol*s)' or a_units == 'cm^3/(molecule*s)' or a_units == 'm^3/(molecule*s)':
a_units = 'm^3/(mol*s)'
elif a_units == 'cm^6/(mol^2*s)' or a_units == 'cm^6/(molecule^2*s)' or a_units == 'm^6/(molecule^2*s)':
a_units = 'm^6/(mol^2*s)'
elif a_units == 's^-1' or a_units == 'm^3/(mol*s)' or a_units == 'm^6/(mol^2*s)':
pass
else:
raise ValueError('Invalid units {0} for averaging kinetics.'.format(a_units))

kinetics = ArrheniusEP(
A=(degeneracy * 10 ** log_a, a_units),
n=n,
alpha=alpha,
E0=(E0 * 0.001, "kJ/mol"),
)
else:
raise ValueError("Source data must be either 'Library', 'PDep','Training', or 'Rate Rules'.")

Expand Down
63 changes: 34 additions & 29 deletions rmgpy/data/kinetics/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -4442,24 +4442,29 @@ def extract_source_from_comments(self, reaction):
"""
lines = reaction.kinetics.comment.split('\n')

exact = False
exact_rule = False
template = None
rules = None
training_entries = None
degeneracy = 1

regex = r"\[(.*)\]" # only hit outermost brackets
training_reaction_pattern = r'Matched reaction\s*(\d+).*in.*training'
degeneracy_pattern = r'Multiplied by reaction path degeneracy\s*(\d+)'

for line in lines:
if line.startswith('Matched'):
training_matches = re.search(training_reaction_pattern, line)
degeneracy_matches = re.search(degeneracy_pattern, line)

if training_matches is not None:
# Source of the kinetics is from training reaction
training_reaction_index = int(line.split()[2])
training_reaction_index = int(training_matches.group(1))
depository = self.get_training_depository()
training_entry = depository.entries[training_reaction_index]
# Perform sanity check that the training reaction's label matches that of the comments
if training_entry.label not in line:
raise AssertionError('Reaction {0} uses kinetics from training reaction {1} '
'but does not match the training reaction {1} from the '
'{2} family.'.format(reaction, training_reaction_index, self.label))
raise AssertionError(f'Reaction {reaction} uses kinetics from training reaction {training_reaction_index} '
f'but does not match the training reaction {training_reaction_index} from the '
f'{self.label} family.')

# Sometimes the matched kinetics could be in the reverse direction.....
if reaction.is_isomorphic(training_entry.item, either_direction=False, save_order=self.save_order):
Expand All @@ -4468,34 +4473,34 @@ def extract_source_from_comments(self, reaction):
reverse = True
return True, [self.label, training_entry, reverse]

elif line.startswith('Exact match'):
exact = True
elif line.startswith('Estimated'):
pass
elif line.startswith('Multiplied by'):
degeneracy = float(line.split()[-1])
if 'Exact match found for rate rule' in line:
exact_rule = True
if degeneracy_matches is not None:
degeneracy = float(degeneracy_matches.group(1))

# Extract the rate rule information
full_comment_string = reaction.kinetics.comment.replace('\n', ' ')

autogen_node_search_pattern = r'Estimated from node (.*)'
# The rate rule string is right after the phrase 'for rate rule'
rate_rule_string = full_comment_string.split("for rate rule", 1)[1].strip()

if rate_rule_string[0] == '[':
# Get the contents of the capture group in the regex
# Remove any spaces which may be left over as a result of a line break
template_label = re.split(regex, rate_rule_string)[1].replace(' ', '')
template_pattern = r"for rate rule \[(.*)\]" # only hit outermost brackets
autogen_node_matches = re.search(autogen_node_search_pattern, full_comment_string)
template_matches = re.search(template_pattern, full_comment_string)
if autogen_node_matches is not None: # autogenerated trees
template_str = autogen_node_matches.group(1).split('Multiplied by reaction path degeneracy')[0].strip()
tokens = template_str.split()
if len(tokens) == 2: # The node was probably split because wordwrap was turned off
assert len(template_str) > 115, 'The node name is too short to have been broken up by the chemkin writer'
template_str = ''.join(tokens)
elif len(tokens) > 2: # warn the user the node is probably wrong
raise ValueError(f'The node name {template_str} has multiple spaces and cannot be parsed for reaction {reaction}.')
template = self.retrieve_template([template_str])
elif template_matches is not None: # hand-built trees
template_label = template_matches.group(1)
template = self.retrieve_template(template_label.split(';'))
else:
# If this has the line 'From training reaction # for rate rule node1;node2'
template_label = rate_rule_string.split()[0]

template = self.retrieve_template(template_label.split(';'))
raise ValueError(f'Could not find rate rule in comments for reaction {reaction}.')
rules, training_entries = self.get_sources_for_template(template)

if not template:
raise ValueError('Could not extract kinetics source from comments for reaction {}.'.format(reaction))

source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact,
source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact_rule,
'rules': rules, 'training': training_entries}

# Source of the kinetics is from rate rules
Expand Down
Loading
Loading