Skip to content

Commit

Permalink
tweak tests in 0.8.x
Browse files Browse the repository at this point in the history
  • Loading branch information
bjodah committed Apr 22, 2024
1 parent 4ed766e commit d6782c8
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 75 deletions.
13 changes: 0 additions & 13 deletions chempy/tests/test_chemistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,16 +570,3 @@ def test_balance_stoichiometry__duplicates():
underdetermined=None,
)
assert bal_Mn_COx == ({"CO": 2}, {"C": 1, "CO2": 1})


def test_composition_dot_as_crystal_water_chempy08x():
"""In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*'
or and interpunct (·) is also accepted (and preferred).
From Chempy v0.9.x on-wards, only interpunct and asterisk will be
interpreted as crystal water delimiters, and a dot will be interpreted
as floating point delimiter in fractional stoichiometric coefficients."""
assert Substance.from_formula('Zn(NO3)2·6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12}
assert Substance.from_formula('Zn(NO3)2*6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12}
# https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning
with pytest.deprecated_call():
assert Substance.from_formula('Zn(NO3)2.6H2O').composition == {30: 1, 7: 2, 8: 12, 1: 12}
37 changes: 26 additions & 11 deletions chempy/util/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _get_formula_parser():
| '{' formula '}'
| '[' formula ']' ) count prime charge?
formula :: term+
hydrate :: ( '.' | '·' | '*' ) count? formula
hydrate :: ( '.' | '\u00B7' | '*' ) count? formula
state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')'
compound :: count formula hydrate? state?
Expand All @@ -115,7 +115,7 @@ def _get_formula_parser():
| '{' formula '}'
| '[' formula ']' ) count prime charge?
formula :: term+
hydrate :: ( '..' | '·' | '*' ) count? formula
hydrate :: ( '..' | '\u00B7' | '*' ) count? formula
state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')'
compound :: count formula hydrate? state?
"""
Expand Down Expand Up @@ -331,16 +331,16 @@ def _parse_stoich(stoich):
_latex_mapping["epsilon-"] = "\\varepsilon-"
_latex_mapping["omicron-"] = "o-"
_latex_mapping["."] = "^\\bullet "
_latex_infix_mapping = {"·": "\\cdot "}
_latex_infix_mapping = {"..": "\\cdot "}

_unicode_mapping = {k + "-": v + "-" for k, v in zip(_greek_letters, _greek_u)}
_unicode_mapping["."] = "⋅"
_unicode_infix_mapping = {"·": "·"}
_unicode_infix_mapping = {"..": "\u00b7"}

_html_mapping = {k + "-": "&" + k + ";-" for k in _greek_letters}
_html_mapping["."] = "⋅"
# _html_infix_mapping = _html_mapping
_html_infix_mapping = {"·": "⋅"}
_html_infix_mapping = {"..": "⋅"}


def _get_leading_integer(s):
Expand Down Expand Up @@ -378,7 +378,7 @@ def formula_to_composition(
True
>>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1}
True
>>> formula_to_composition('Na2CO3·7H2O'.format(s)) == {11: 2, 6: 1, 8: 10, 1: 14}
>>> formula_to_composition('Na2CO3*7H2O'.format(s)) == {11: 2, 6: 1, 8: 10, 1: 14}
True
"""
Expand All @@ -387,7 +387,19 @@ def formula_to_composition(

stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2]
tot_comp = {}
parts = stoich_tok.split("·")
if ".." in stoich_tok:
parts = stoich_tok.split("..")
elif "\u00b7" in stoich_tok:
parts = stoich_tok.split('\u00b7')
elif '.' in stoich_tok:
warnings.warn(
("dot is ambiguous in chempy-0.8.x, prefer '*' or '\u00b7' for complexes."
" Dot will be interpreted as floating point in chempy-0.9+"),
ChemPyDeprecationWarning
)
parts = stoich_tok.split('.')
else:
parts = list(filter(len, internal_asterisk.split(stoich_tok)))

for idx, stoich in enumerate(parts):
if idx == 0:
Expand Down Expand Up @@ -524,6 +536,8 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs):
)


internal_asterisk = re.compile(r"([^\s\*]+)\*([a-zA-Z0-9]+)")

def _formula_to_format(
sub,
sup,
Expand All @@ -533,23 +547,24 @@ def _formula_to_format(
suffixes=("(s)", "(l)", "(g)", "(aq)"),
):
parts = _formula_to_parts(formula, prefixes.keys(), suffixes)
parts0 = parts[0].replace("..", '·').replace('*', '·')
parts0 = parts[0].replace("..", "\u00B7")
parts0 = internal_asterisk.sub("\u00B7", parts0)
if '.' in parts0:
warnings.warn(
("dot is ambiguous in chempy-0.8.x, prefer '*' or '' for complexes."
" Dot will be interpreted as floating point in chempy-0.9+"),
ChemPyDeprecationWarning
)
parts0 = parts0.replace('.', '·')
stoichs = parts0.split("·")
parts0 = parts0.replace('.', "\u00B7")
stoichs = parts0.split("\u00B7")

string = ""
for idx, stoich in enumerate(stoichs):
if idx == 0:
m = 1
else:
m, stoich = _get_leading_integer(stoich)
string += _subs("·", infixes)
string += _subs("..", infixes)
if m != 1:
string += str(m)
string += re.sub(r"([0-9]+\.[0-9]+|[0-9]+)", lambda m: sub(m.group(1)), stoich)
Expand Down
118 changes: 67 additions & 51 deletions chempy/util/tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,45 +303,46 @@ def test_formula_to_composition_bad_complexes(species):
formula_to_composition(species)


@pytest.mark.parametrize(
"species, composition",
[
(
"Ca2.832Fe0.6285Mg5.395(CO3)6",
{
6: 6,
8: 18,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
{
6: 6,
8: 18,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
{
1: 16,
6: 6,
8: 26,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
],
)
@requires(parsing_library)
def test_formula_to_composition_fractional_subscripts(species, composition):
assert formula_to_composition(species) == composition
# This test is enabled in chempy-0.9+
# @pytest.mark.parametrize(
# "species, composition",
# [
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6",
# {
# 6: 6,
# 8: 18,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
# {
# 6: 6,
# 8: 18,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
# {
# 1: 16,
# 6: 6,
# 8: 26,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# ],
# )
# @requires(parsing_library)
# def test_formula_to_composition_fractional_subscripts(species, composition):
# assert formula_to_composition(species) == composition


@pytest.mark.parametrize(
Expand Down Expand Up @@ -535,18 +536,19 @@ def test_to_reaction():
),
("[Fe(CN)6]-3", r"[Fe(CN)_{6}]^{3-}"),
("[Fe(CN)6]-3(aq)", r"[Fe(CN)_{6}]^{3-}(aq)"),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}",
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)",
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)",
),
# This test is enabled in chempy-0.9+:
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}",
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)",
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)",
# ),
],
)
@requires(parsing_library)
Expand Down Expand Up @@ -712,3 +714,17 @@ def test_formula_to_html(species, html):
def test_formula_to_html_caged(species, html):
"""Should produce HTML for cage species."""
assert formula_to_html(species) == html


def test_composition_dot_as_crystal_water_chempy08x():
"""In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*'
or and interpunct (·) is also accepted (and preferred).
From Chempy v0.9.x on-wards, only interpunct and asterisk will be
interpreted as crystal water delimiters, and a dot will be interpreted
as floating point delimiter in fractional stoichiometric coefficients."""
ref = {30: 1, 7: 2, 8: 12, 1: 12}
assert formula_to_composition('Zn(NO3)2{}6H2O'.format('\u00B7')) == ref
assert formula_to_composition('Zn(NO3)2*6H2O') == ref
# https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning
with pytest.deprecated_call():
assert formula_to_composition('Zn(NO3)2.6H2O') == ref

0 comments on commit d6782c8

Please sign in to comment.