From c7a9c1d1a23488a2e00fd5a80fcedba355a5b7e0 Mon Sep 17 00:00:00 2001 From: Bjorn Date: Mon, 22 Apr 2024 15:47:51 +0200 Subject: [PATCH] Restore old crystal water parsing in v0.8.x (#229) * Add (currently failing) test for crystal water parsing for 0.8.x * latest cython * apply old parsing behavior to 0.8.x * tweak tests in 0.8.x * linting --- .woodpecker.yaml | 1 + README.rst | 2 +- chempy/util/parsing.py | 40 ++++++++-- chempy/util/tests/test_parsing.py | 118 +++++++++++++++++------------- 4 files changed, 102 insertions(+), 59 deletions(-) diff --git a/.woodpecker.yaml b/.woodpecker.yaml index 3aca31ce..115497fb 100644 --- a/.woodpecker.yaml +++ b/.woodpecker.yaml @@ -27,6 +27,7 @@ steps: - export CPATH=$SUNDBASE/include:$CPATH - export LIBRARY_PATH=$SUNDBASE/lib - export LD_LIBRARY_PATH=$SUNDBASE/lib + - python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user --upgrade-strategy=eager --upgrade cython - python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user -e .[all] - python3 -c "import pycvodes; import pyodesys; import pygslodeiv2" # debug this CI config - git fetch -tq diff --git a/README.rst b/README.rst index 74294e81..a16c85af 100644 --- a/README.rst +++ b/README.rst @@ -385,7 +385,7 @@ If you make use of ChemPy in e.g. academic work you may cite the following peer- Depending on what underlying solver you are using you should also cite the appropriate paper (you can look at the list of references in the JOSS article). If you need to reference, in addition to the paper, a specific point version of ChemPy (for e.g. reproducibility) -you can get per-version DOIs from the zendodo archive: +you can get per-version DOIs from the zenodo archive: .. image:: https://zenodo.org/badge/8840/bjodah/chempy.svg :target: https://zenodo.org/badge/latestdoi/8840/bjodah/chempy diff --git a/chempy/util/parsing.py b/chempy/util/parsing.py index 0313545c..aca30723 100644 --- a/chempy/util/parsing.py +++ b/chempy/util/parsing.py @@ -5,8 +5,9 @@ from collections import defaultdict import re +import warnings -from .pyutil import memoize +from .pyutil import memoize, ChemPyDeprecationWarning from .periodic import symbols parsing_library = "pyparsing" # info used for selective testing. @@ -95,7 +96,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: '.' count? formula + hydrate :: ( '.' | '\u00B7' | '*' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? @@ -114,7 +115,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: '..' count? formula + hydrate :: ( '..' | '\u00B7' | '*' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? """ @@ -334,7 +335,7 @@ def _parse_stoich(stoich): _unicode_mapping = {k + "-": v + "-" for k, v in zip(_greek_letters, _greek_u)} _unicode_mapping["."] = "⋅" -_unicode_infix_mapping = {"..": "·"} +_unicode_infix_mapping = {"..": "\u00b7"} _html_mapping = {k + "-": "&" + k + ";-" for k in _greek_letters} _html_mapping["."] = "⋅" @@ -377,7 +378,7 @@ def formula_to_composition( True >>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1} True - >>> formula_to_composition('Na2CO3..7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} + >>> formula_to_composition('Na2CO3*7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} True """ @@ -386,7 +387,19 @@ def formula_to_composition( stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2] tot_comp = {} - parts = stoich_tok.split("..") + if ".." in stoich_tok: + parts = stoich_tok.split("..") + elif "\u00b7" in stoich_tok: + parts = stoich_tok.split('\u00b7') + elif '.' in stoich_tok: + warnings.warn( + ("dot is ambiguous in chempy-0.8.x, prefer '*' or '\u00b7' for complexes." + " Dot will be interpreted as floating point in chempy-0.9+"), + ChemPyDeprecationWarning + ) + parts = stoich_tok.split('.') + else: + parts = list(filter(len, internal_asterisk.split(stoich_tok))) for idx, stoich in enumerate(parts): if idx == 0: @@ -523,6 +536,9 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs): ) +internal_asterisk = re.compile(r"([^\s\*]+)\*([a-zA-Z0-9]+)") + + def _formula_to_format( sub, sup, @@ -532,7 +548,17 @@ def _formula_to_format( suffixes=("(s)", "(l)", "(g)", "(aq)"), ): parts = _formula_to_parts(formula, prefixes.keys(), suffixes) - stoichs = parts[0].split("..") + parts0 = parts[0].replace("..", "\u00B7") + parts0 = internal_asterisk.sub("\u00B7", parts0) + if '.' in parts0: + warnings.warn( + ("dot is ambiguous in chempy-0.8.x, prefer '*' or '' for complexes." + " Dot will be interpreted as floating point in chempy-0.9+"), + ChemPyDeprecationWarning + ) + parts0 = parts0.replace('.', "\u00B7") + stoichs = parts0.split("\u00B7") + string = "" for idx, stoich in enumerate(stoichs): if idx == 0: diff --git a/chempy/util/tests/test_parsing.py b/chempy/util/tests/test_parsing.py index 67386278..d6418e67 100644 --- a/chempy/util/tests/test_parsing.py +++ b/chempy/util/tests/test_parsing.py @@ -303,45 +303,46 @@ def test_formula_to_composition_bad_complexes(species): formula_to_composition(species) -@pytest.mark.parametrize( - "species, composition", - [ - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6", - { - 6: 6, - 8: 18, - 12: 5.395, - 20: 2.832, - 26: 0.6285, - }, - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", - { - 6: 6, - 8: 18, - 12: 5.395, - 20: 2.832, - 26: 0.6285, - }, - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", - { - 1: 16, - 6: 6, - 8: 26, - 12: 5.395, - 20: 2.832, - 26: 0.6285, - }, - ), - ], -) -@requires(parsing_library) -def test_formula_to_composition_fractional_subscripts(species, composition): - assert formula_to_composition(species) == composition +# This test is enabled in chempy-0.9+ +# @pytest.mark.parametrize( +# "species, composition", +# [ +# ( +# "Ca2.832Fe0.6285Mg5.395(CO3)6", +# { +# 6: 6, +# 8: 18, +# 12: 5.395, +# 20: 2.832, +# 26: 0.6285, +# }, +# ), +# ( +# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", +# { +# 6: 6, +# 8: 18, +# 12: 5.395, +# 20: 2.832, +# 26: 0.6285, +# }, +# ), +# ( +# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", +# { +# 1: 16, +# 6: 6, +# 8: 26, +# 12: 5.395, +# 20: 2.832, +# 26: 0.6285, +# }, +# ), +# ], +# ) +# @requires(parsing_library) +# def test_formula_to_composition_fractional_subscripts(species, composition): +# assert formula_to_composition(species) == composition @pytest.mark.parametrize( @@ -535,18 +536,19 @@ def test_to_reaction(): ), ("[Fe(CN)6]-3", r"[Fe(CN)_{6}]^{3-}"), ("[Fe(CN)6]-3(aq)", r"[Fe(CN)_{6}]^{3-}(aq)"), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6", - r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}", - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", - r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)", - ), - ( - "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", - r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)", - ), + # This test is enabled in chempy-0.9+: + # ( + # "Ca2.832Fe0.6285Mg5.395(CO3)6", + # r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}", + # ), + # ( + # "Ca2.832Fe0.6285Mg5.395(CO3)6(s)", + # r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)", + # ), + # ( + # "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)", + # r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)", + # ), ], ) @requires(parsing_library) @@ -712,3 +714,17 @@ def test_formula_to_html(species, html): def test_formula_to_html_caged(species, html): """Should produce HTML for cage species.""" assert formula_to_html(species) == html + + +def test_composition_dot_as_crystal_water_chempy08x(): + """In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*' + or and interpunct (·) is also accepted (and preferred). + From Chempy v0.9.x on-wards, only interpunct and asterisk will be + interpreted as crystal water delimiters, and a dot will be interpreted + as floating point delimiter in fractional stoichiometric coefficients.""" + ref = {30: 1, 7: 2, 8: 12, 1: 12} + assert formula_to_composition('Zn(NO3)2{}6H2O'.format('\u00B7')) == ref + assert formula_to_composition('Zn(NO3)2*6H2O') == ref + # https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning + with pytest.deprecated_call(): + assert formula_to_composition('Zn(NO3)2.6H2O') == ref