diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 94c4dc226..d77a1ca19 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -4,7 +4,7 @@ The next release must bump the major version number. ## API changes -The column *exclude* of the `meta` attribute was refacored to a new attribute `exclude`. +The column *exclude* of the `meta` indicators was moved to a new attribute `exclude`. All validation methods are refactored such that the argument `exclude_on_fail` changes this new attribute (see PR [#759](https://github.com/IAMconsortium/pyam/pull/759)). @@ -12,8 +12,14 @@ The term "exclude" is now an illegal column name for (timeseries) data and meta When importing an xlsx file created with pyam < 2.0, which has an "exclude" column in "meta", that column is moved to the new exclude attribute with a log message. +PR [#764](https://github.com/IAMconsortium/pyam/pull/764) implemented a more restrictive +approach to exposing pyam-internals at the package level, requiring a more explicit +import of these methods. For example, use `pyam.utils.to_list()` +instead of `pyam.to_list()`. + ## Individual updates +- [#764](https://github.com/IAMconsortium/pyam/pull/764) Clean-up exposing internal methods and attributes - [#763](https://github.com/IAMconsortium/pyam/pull/763) Implement a fix against carrying over unused levels when initializing from an indexed pandas object - [#759](https://github.com/IAMconsortium/pyam/pull/759) Excise "exclude" column from meta and add a own attribute - [#747](https://github.com/IAMconsortium/pyam/pull/747) Drop support for Python 3.7 diff --git a/docs/api/compute.rst b/docs/api/compute.rst index 8191b2135..75d17aadb 100644 --- a/docs/api/compute.rst +++ b/docs/api/compute.rst @@ -1,4 +1,4 @@ -.. currentmodule:: pyam +.. currentmodule:: pyam.compute Derived timeseries data ======================= diff --git a/docs/api/slice.rst b/docs/api/slice.rst index de2705d91..44ae010f7 100644 --- a/docs/api/slice.rst +++ b/docs/api/slice.rst @@ -1,4 +1,4 @@ -.. currentmodule:: pyam +.. currentmodule:: pyam.slice The **IamSlice** class ====================== diff --git a/docs/api/timeseries.rst b/docs/api/timeseries.rst index 17c6c74d3..32e416d84 100644 --- a/docs/api/timeseries.rst +++ b/docs/api/timeseries.rst @@ -1,4 +1,4 @@ -.. currentmodule:: pyam +.. currentmodule:: pyam.timeseries Timeseries functions ==================== diff --git a/docs/api/variables.rst b/docs/api/variables.rst index 200d65920..1ddb0abb4 100644 --- a/docs/api/variables.rst +++ b/docs/api/variables.rst @@ -1,12 +1,11 @@ -.. currentmodule:: pyam +.. currentmodule:: pyam.str Variables utilities =================== -The **variable** dimension of the |pyam| data format implements -implements a "semi-hierarchical" structure using the :code:`|` character -(*pipe*, not l or i) to indicate the *depth*. -Read the `data model documentation`_ for more information. +The **variable** dimension of the |pyam| data format implements implements a +"semi-hierarchical" structure using the :code:`|` character (*pipe*, not l or i) +to indicate the *depth*. Read the `data model documentation`_ for more information. .. _`data model documentation`: ../data.html#the-variable-column @@ -16,6 +15,8 @@ The package provides several functions to work with such strings. .. autofunction:: find_depth +.. autofunction:: get_variable_components + .. autofunction:: reduce_hierarchy -.. autofunction:: get_variable_components +.. autofunction:: is_str \ No newline at end of file diff --git a/docs/examples/plot_scatter.py b/docs/examples/plot_scatter.py index 9fd69a988..c4d2b5368 100644 --- a/docs/examples/plot_scatter.py +++ b/docs/examples/plot_scatter.py @@ -57,7 +57,7 @@ ) df.set_meta( - meta=co2.apply(pyam.cumulative, first_year=2020, last_year=2100, axis=1), + meta=co2.apply(pyam.timeseries.cumulative, first_year=2020, last_year=2100, axis=1), name="cumulative_co2", ) diff --git a/docs/tutorials/aggregating_downscaling_consistency.ipynb b/docs/tutorials/aggregating_downscaling_consistency.ipynb index 8ad7940f2..48a369b4e 100644 --- a/docs/tutorials/aggregating_downscaling_consistency.ipynb +++ b/docs/tutorials/aggregating_downscaling_consistency.ipynb @@ -55,7 +55,7 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import pyam" + "from pyam import IamDataFrame" ] }, { @@ -73,7 +73,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pyam.IamDataFrame(data='tutorial_data_aggregating_downscaling.csv')" + "df = IamDataFrame(data='tutorial_data_aggregating_downscaling.csv')" ] }, { @@ -421,7 +421,7 @@ "metadata": {}, "outputs": [], "source": [ - "tutorial_df = pyam.IamDataFrame(pd.DataFrame([\n", + "tutorial_df = IamDataFrame(pd.DataFrame([\n", " ['World', 'Primary Energy', 'EJ/yr', 7, 15],\n", " ['World', 'Primary Energy|Coal', 'EJ/yr', 4, 11],\n", " ['World', 'Primary Energy|Wind', 'EJ/yr', 2, 4],\n", @@ -483,7 +483,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -497,7 +497,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/aggregating_variables_and_plotting_with_negative_values.ipynb b/docs/tutorials/aggregating_variables_and_plotting_with_negative_values.ipynb index 0372709e6..b2052adec 100644 --- a/docs/tutorials/aggregating_variables_and_plotting_with_negative_values.ipynb +++ b/docs/tutorials/aggregating_variables_and_plotting_with_negative_values.ipynb @@ -18,7 +18,7 @@ "import numpy as np\n", "import pandas as pd\n", "\n", - "import pyam" + "from pyam import IamDataFrame" ] }, { @@ -46,7 +46,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pyam.IamDataFrame(pd.DataFrame([\n", + "df = IamDataFrame(pd.DataFrame([\n", " ['IMG', 'a_scen', 'World', 'Emissions|CO2|Energy|Oil', 'Mt CO2/yr', 2, 3.2, 2.0, 1.8],\n", " ['IMG', 'a_scen', 'World', 'Emissions|CO2|Energy|Gas', 'Mt CO2/yr', 1.3, 1.6, 1.0, 0.7],\n", " ['IMG', 'a_scen', 'World', 'Emissions|CO2|Energy|BECCS', 'Mt CO2/yr', 0.0, 0.4, -0.4, 0.3],\n", @@ -164,7 +164,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pyam.IamDataFrame(pd.DataFrame([\n", + "df = IamDataFrame(pd.DataFrame([\n", " ['IMG', 'a_scen', 'World', 'Emissions|CO2', 'Mt CO2/yr', 4.6, 5.3, 5.5, 4.3],\n", " ['IMG', 'a_scen', 'World', 'Emissions|CO2|Fossil', 'Mt CO2/yr', 4.0, 4.6, 4.9, 4.1],\n", " ['IMG', 'a_scen', 'World', 'Emissions|CO2|AFOLU', 'Mt CO2/yr', 0.6, 0.7, 0.6, 0.2],\n", @@ -235,7 +235,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Even if there are sectors which are defined only at the world level (e.g. `Emissions|CO2|Fossil|Aviation` in our example), Pyam will find them and include them when calculating the regional total if we specify `components=True` when using `aggregate_region`." + "Even if there are sectors which are defined only at the world level (e.g. `Emissions|CO2|Fossil|Aviation` in our example), **pyam** will find them and include them when calculating the regional total if we specify `components=True` when using `aggregate_region`." ] }, { @@ -300,7 +300,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -314,7 +314,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/algebraic_operations.ipynb b/docs/tutorials/algebraic_operations.ipynb index 5651aa07d..810c70d4d 100644 --- a/docs/tutorials/algebraic_operations.ipynb +++ b/docs/tutorials/algebraic_operations.ipynb @@ -50,7 +50,7 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import pyam" + "from pyam import IamDataFrame" ] }, { @@ -68,7 +68,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pyam.IamDataFrame(data='tutorial_data_aggregating_downscaling.csv')\n", + "df = IamDataFrame(data='tutorial_data_aggregating_downscaling.csv')\n", "df" ] }, @@ -334,7 +334,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -348,7 +348,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/data_table_formats.ipynb b/docs/tutorials/data_table_formats.ipynb index b57e86421..b3508b78f 100644 --- a/docs/tutorials/data_table_formats.ipynb +++ b/docs/tutorials/data_table_formats.ipynb @@ -47,7 +47,7 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import pyam" + "from pyam import IamDataFrame, IAMC_IDX" ] }, { @@ -74,7 +74,7 @@ " ['model_a', 'scen_a', 'World', 'Primary Energy|Coal', 'EJ/y', 0.5, 3],\n", " ['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2, 7],\n", "],\n", - " columns=pyam.IAMC_IDX + [2005, 2010],\n", + " columns=IAMC_IDX + [2005, 2010],\n", ")\n", "\n", "SIMPLE_DF" @@ -86,7 +86,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_simple = pyam.IamDataFrame(SIMPLE_DF)" + "df_simple = IamDataFrame(SIMPLE_DF)" ] }, { @@ -132,7 +132,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_missing_cols = pyam.IamDataFrame(MISSING_COLS_DF, model='model_a')" + "df_missing_cols = IamDataFrame(MISSING_COLS_DF, model='model_a')" ] }, { @@ -169,7 +169,7 @@ " ['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2005, 2],\n", " ['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2010, 7],\n", "],\n", - " columns=pyam.IAMC_IDX + ['year', 'value'],\n", + " columns=IAMC_IDX + ['year', 'value'],\n", ")\n", "\n", "LONG_DF" @@ -181,7 +181,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_long = pyam.IamDataFrame(LONG_DF)" + "df_long = IamDataFrame(LONG_DF)" ] }, { @@ -217,8 +217,7 @@ " ['model_a', 'scen_b', 'World', 'EJ/y', 2005, 2, None],\n", " ['model_a', 'scen_b', 'World', 'EJ/y', 2010, 7, None]\n", "],\n", - " columns=['model', 'scenario', 'region', 'unit', 'year',\n", - " 'Primary Energy', 'Primary Energy|Coal'],\n", + " columns=['model', 'scenario', 'region', 'unit', 'year', 'Primary Energy', 'Primary Energy|Coal'],\n", ")\n", "\n", "VALUE_COLS_DF" @@ -230,8 +229,10 @@ "metadata": {}, "outputs": [], "source": [ - "df_value_cols = pyam.IamDataFrame(VALUE_COLS_DF,\n", - " value=['Primary Energy', 'Primary Energy|Coal'])" + "df_value_cols = IamDataFrame(\n", + " VALUE_COLS_DF,\n", + " value=['Primary Energy', 'Primary Energy|Coal']\n", + ")" ] }, { @@ -277,7 +278,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_other_header = pyam.IamDataFrame(OTHER_HEADER_DF, scenario='foo')" + "df_other_header = IamDataFrame(OTHER_HEADER_DF, scenario='foo')" ] }, { @@ -324,7 +325,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_concat = pyam.IamDataFrame(CONCAT_DF, variable=['var_1', 'var_2'])" + "df_concat = IamDataFrame(CONCAT_DF, variable=['var_1', 'var_2'])" ] }, { @@ -379,9 +380,14 @@ "outputs": [], "source": [ "df_complicated = (\n", - " pyam.IamDataFrame(COMPLICATED_DF, model='model_a', region='iso',\n", - " value=['primary', 'coal'])\n", - " .rename(variable={'primary': 'Primary Energy', 'coal': 'Primary Energy|Coal'})\n", + " IamDataFrame(\n", + " COMPLICATED_DF,\n", + " model='model_a',\n", + " region='iso',\n", + " value=['primary', 'coal']\n", + " ).rename(\n", + " variable={'primary': 'Primary Energy', 'coal': 'Primary Energy|Coal'}\n", + " )\n", ")" ] }, @@ -423,7 +429,7 @@ " ['model_a', 'scen_a', 'World', 'Primary Energy|Coal', 'EJ/y', 2.1, 0.5, 3],\n", " ['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2.1, 2, 7],\n", "],\n", - " columns=pyam.IAMC_IDX + ['version', 2005, 2010],\n", + " columns=IAMC_IDX + ['version', 2005, 2010],\n", ")\n", "\n", "CUSTOM_COL_DF" @@ -435,7 +441,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_custom_col = pyam.IamDataFrame(CUSTOM_COL_DF)" + "df_custom_col = IamDataFrame(CUSTOM_COL_DF)" ] }, { @@ -450,7 +456,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -464,7 +470,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/iiasa_dbs.ipynb b/docs/tutorials/iiasa_dbs.ipynb index 8c5fb656e..8e1d23535 100644 --- a/docs/tutorials/iiasa_dbs.ipynb +++ b/docs/tutorials/iiasa_dbs.ipynb @@ -300,7 +300,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -314,7 +314,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/legends.ipynb b/docs/tutorials/legends.ipynb index e2d6b4d79..bccf1f360 100644 --- a/docs/tutorials/legends.ipynb +++ b/docs/tutorials/legends.ipynb @@ -17,7 +17,7 @@ "metadata": {}, "outputs": [], "source": [ - "import pyam" + "from pyam import IamDataFrame" ] }, { @@ -27,7 +27,7 @@ "outputs": [], "source": [ "df = (\n", - " pyam.IamDataFrame(data='tutorial_data.csv')\n", + " IamDataFrame(data='tutorial_data.csv')\n", " .filter(variable='Emissions|CO2', region='World')\n", ")\n", "\n", @@ -120,7 +120,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -134,7 +134,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/pyam_logo.ipynb b/docs/tutorials/pyam_logo.ipynb index 5a76dd435..db324143c 100644 --- a/docs/tutorials/pyam_logo.ipynb +++ b/docs/tutorials/pyam_logo.ipynb @@ -20,7 +20,7 @@ "outputs": [], "source": [ "import itertools\n", - "import pyam\n", + "from pyam import IamDataFrame, IAMC_IDX\n", "\n", "import pandas as pd\n", "import numpy as np\n", @@ -49,7 +49,7 @@ "source": [ "combinations = itertools.product(['m1', 'm2', 'm3', 'm4'], ['s1', 's2', 's3'])\n", "data = [[m, s] + ['r', 'v', 'u'] + list(func(x, 0.5 + 0.1 * i)) for i, (m, s) in enumerate(combinations)]\n", - "df = pyam.IamDataFrame(pd.DataFrame(data, columns=pyam.IAMC_IDX + list(range(len(x)))))" + "df = IamDataFrame(pd.DataFrame(data, columns=IAMC_IDX + list(range(len(x)))))" ] }, { @@ -81,7 +81,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -95,7 +95,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/quantiles.ipynb b/docs/tutorials/quantiles.ipynb index 6a441c42c..cf2af6bd1 100644 --- a/docs/tutorials/quantiles.ipynb +++ b/docs/tutorials/quantiles.ipynb @@ -15,12 +15,11 @@ "metadata": {}, "outputs": [], "source": [ - "import pyam\n", - "\n", "import numpy as np\n", "import pandas as pd\n", + "from matplotlib import pyplot as plt\n", "\n", - "from matplotlib import pyplot as plt" + "from pyam import IamDataFrame" ] }, { @@ -41,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pyam.IamDataFrame(data='tutorial_data.csv')\n", + "df = IamDataFrame(data='tutorial_data.csv')\n", "df.timeseries().head()" ] }, diff --git a/docs/tutorials/subannual_time_resolution.ipynb b/docs/tutorials/subannual_time_resolution.ipynb index 3ea472927..5c33036e7 100644 --- a/docs/tutorials/subannual_time_resolution.ipynb +++ b/docs/tutorials/subannual_time_resolution.ipynb @@ -33,7 +33,8 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import pyam" + "\n", + "from pyam import IamDataFrame" ] }, { @@ -51,7 +52,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pyam.IamDataFrame(data='tutorial_data_subannual_time.csv')" + "df = IamDataFrame(data='tutorial_data_subannual_time.csv')" ] }, { @@ -119,7 +120,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -133,7 +134,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/tutorials/unit_conversion.ipynb b/docs/tutorials/unit_conversion.ipynb index 9d5e38d28..51d731c1a 100644 --- a/docs/tutorials/unit_conversion.ipynb +++ b/docs/tutorials/unit_conversion.ipynb @@ -48,7 +48,7 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import pyam" + "from pyam import IamDataFrame, IAMC_IDX" ] }, { @@ -73,10 +73,10 @@ " ['MESSAGEix-GLOBIOM 1.0', 'CD-LINKS_NPi', 'World', 'Primary Energy', 'EJ/yr', 500.74, 636.79, 809.93, 1284.78],\n", " ['MESSAGEix-GLOBIOM 1.0', 'CD-LINKS_NPi', 'World', 'Emissions|CH4', 'Mt CH4/yr', 327.92, 354.35, 377.88, 403.98],\n", "],\n", - " columns=pyam.IAMC_IDX + [2010, 2030, 2050, 2100],\n", + " columns=IAMC_IDX + [2010, 2030, 2050, 2100],\n", ")\n", "\n", - "df = pyam.IamDataFrame(UNIT_DF)\n", + "df = IamDataFrame(UNIT_DF)\n", "df.timeseries()" ] }, @@ -274,7 +274,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -288,9 +288,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/pyam/__init__.py b/pyam/__init__.py index 8b9357ba1..6abb0787b 100644 --- a/pyam/__init__.py +++ b/pyam/__init__.py @@ -3,17 +3,25 @@ from pathlib import Path from setuptools_scm import get_version -from pyam.core import * -from pyam.slice import IamSlice # noqa: F401 -from pyam.utils import * -from pyam.statistics import * -from pyam.timeseries import * +from pyam.core import ( + IamDataFrame, + categorize, + check_aggregate, + compare, + concat, + filter_by_meta, + require_variable, + read_datapackage, + validate, +) +from pyam.statistics import Statistics from pyam.logging import * -from pyam.run_control import * from pyam.iiasa import read_iiasa, lazy_read_iiasa # noqa: F401 from pyam.datareader import read_worldbank # noqa: F401 from pyam.unfccc import read_unfccc # noqa: F401 from pyam.testing import assert_iamframe_equal # noqa: F401 +from pyam.run_control import run_control # noqa: F401 +from pyam.utils import IAMC_IDX # noqa: F401 from pyam.logging import defer_logging_config diff --git a/pyam/aggregation.py b/pyam/aggregation.py index 0785ed750..b571186cd 100644 --- a/pyam/aggregation.py +++ b/pyam/aggregation.py @@ -5,14 +5,8 @@ from pyam.index import replace_index_values from pyam.logging import adjust_log_level -from pyam.utils import ( - islistable, - isstr, - find_depth, - reduce_hierarchy, - KNOWN_FUNCS, - to_list, -) +from pyam.str import find_depth, is_str, reduce_hierarchy +from pyam.utils import KNOWN_FUNCS, is_list_like, to_list from pyam._compare import _compare @@ -24,13 +18,13 @@ def _aggregate(df, variable, components=None, method=np.sum): if components is not None: # ensure that components is a proper list (not a dictionary) - if not islistable(components) or isinstance(components, dict): + if not is_list_like(components) or isinstance(components, dict): raise ValueError( f"Value for `components` must be a list, found: {components}" ) # list of variables require default components (no manual list) - if islistable(variable): + if is_list_like(variable): raise NotImplementedError( "Aggregating by list of variables does not support `components`." ) @@ -38,7 +32,7 @@ def _aggregate(df, variable, components=None, method=np.sum): mapping = {} msg = "Cannot aggregate variable '{}' because it has no components!" # if single variable - if isstr(variable): + if is_str(variable): # default components to all variables one level below `variable` components = components or df._variable_components(variable) @@ -51,7 +45,7 @@ def _aggregate(df, variable, components=None, method=np.sum): # else, use all variables one level below `variable` as components else: - for v in variable if islistable(variable) else [variable]: + for v in variable if is_list_like(variable) else [variable]: _components = df._variable_components(v) if not len(_components): logger.info(msg.format(v)) @@ -113,7 +107,7 @@ def _aggregate_region( drop_negative_weights=True, ): """Internal implementation for aggregating data over subregions""" - if not isstr(variable) and components is not False: + if not is_str(variable) and components is not False: raise ValueError( "Aggregating by list of variables with components is not supported!" ) @@ -241,7 +235,7 @@ def _agg_weight(data, weight, method, drop_negative_weights): def _get_method_func(method): """Translate a string to a known method""" - if not isstr(method): + if not is_str(method): return method if method in KNOWN_FUNCS: diff --git a/pyam/core.py b/pyam/core.py index 22bdc88cf..53d9ea147 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -24,6 +24,7 @@ HAS_DATAPACKAGE = False from pyam.run_control import run_control +from pyam.str import find_depth, is_str from pyam.utils import ( write_sheet, read_file, @@ -31,11 +32,9 @@ format_data, merge_meta, merge_exclude, - find_depth, pattern_match, to_list, - isstr, - islistable, + is_list_like, print_list, s, DEFAULT_META_INDEX, @@ -168,7 +167,7 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs): _data = format_data(data, index=index, **kwargs) # unsupported `data` args - elif islistable(data): + elif is_list_like(data): raise ValueError( "Initializing from list is not supported, " "use `IamDataFrame.append()` or `pyam.concat()`" @@ -235,7 +234,7 @@ def _finalize(self, data, append, **args): return IamDataFrame(data, meta=self.meta, **args) def __getitem__(self, key): - _key_check = [key] if isstr(key) else key + _key_check = [key] if is_str(key) else key if isinstance(key, IamSlice): return IamDataFrame(self._data.loc[key]) elif key == "value": @@ -628,8 +627,8 @@ def pivot_table( Output style for pivot table formatting, accepts 'highlight_not_max', 'heatmap' """ - index = [index] if isstr(index) else index - columns = [columns] if isstr(columns) else columns + index = [index] if is_str(index) else index + columns = [columns] if is_str(columns) else columns if values != "value": raise ValueError("This method only supports `values='value'`!") @@ -637,7 +636,7 @@ def pivot_table( df = self._data # allow 'aggfunc' to be passed as string for easier user interface - if isstr(aggfunc): + if is_str(aggfunc): if aggfunc == "count": df = self._data.groupby(index + columns).count() fill_value = 0 @@ -865,7 +864,7 @@ def set_meta(self, meta, name=None, index=None): # if no valid index is provided, add meta as new column `name` and exit if index is None: - self.meta[name] = list(meta) if islistable(meta) else meta + self.meta[name] = list(meta) if is_list_like(meta) else meta return # use meta.index if index arg is an IamDataFrame @@ -1848,7 +1847,7 @@ def slice(self, keep=True, **kwargs): Returns ------- - :class:`IamSlice` + :class:`pyam.slice.IamSlice` Notes ----- @@ -2415,14 +2414,14 @@ def to_excel( excel_writer : path-like, file-like, or ExcelWriter object File path as string or :class:`pathlib.Path`, or existing :class:`pandas.ExcelWriter`. - sheet_name : string + sheet_name : str, optional Name of sheet which will contain :meth:`IamDataFrame.timeseries` data. iamc_index : bool, optional If True, use `['model', 'scenario', 'region', 'variable', 'unit']`; else, use all :attr:`dimensions`. See :meth:`IamDataFrame.timeseries` for details. - include_meta : boolean or string, optional - If True, write :attr:`IamDataFrame.meta` to a sheet 'meta' (default); + include_meta : bool or str, optional + If True, write :attr:`meta` to a sheet 'meta' (default); if this is a string, use it as sheet name. **kwargs Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like) @@ -2965,7 +2964,7 @@ def concat(objs, ignore_meta_conflict=False, **kwargs): The :attr:`dimensions` and :attr:`index` names of all elements of *dfs* must be identical. The returned IamDataFrame inherits the dimensions and index names. """ - if not islistable(objs) or isinstance(objs, pd.DataFrame): + if not is_list_like(objs) or isinstance(objs, pd.DataFrame): raise TypeError(f"'{objs.__class__.__name__}' object is not iterable") objs = list(objs) diff --git a/pyam/iiasa.py b/pyam/iiasa.py index 651468cbe..e4303a050 100644 --- a/pyam/iiasa.py +++ b/pyam/iiasa.py @@ -15,12 +15,12 @@ import pandas as pd from pyam.core import IamDataFrame +from pyam.str import is_str from pyam.utils import ( META_IDX, IAMC_IDX, - isstr, pattern_match, - islistable, + is_list_like, ) from pyam.logging import deprecation_warning @@ -84,7 +84,7 @@ def __init__(self, creds: str = None, auth_url: str = _AUTH_URL): self.creds = _read_config(DEFAULT_IIASA_CREDS) else: self.creds = None - elif isinstance(creds, Path) or isstr(creds): + elif isinstance(creds, Path) or is_str(creds): self.creds = _read_config(creds) else: raise DeprecationWarning( @@ -423,7 +423,7 @@ def _query_post(self, meta, default_only=True, **kwargs): def _get_kwarg(k): # TODO refactor API to return all models if model-list is empty x = kwargs.pop(k, "*" if k == "model" else []) - return [x] if isstr(x) else x + return [x] if is_str(x) else x m_pattern = _get_kwarg("model") s_pattern = _get_kwarg("scenario") @@ -527,7 +527,7 @@ def query(self, default_only=True, meta=True, **kwargs): if meta: _meta = self.meta(default_only=default_only, run_id=True) # downselect to subset of meta columns if given as list - if islistable(meta): + if is_list_like(meta): # always merge 'version' (even if not requested explicitly) # 'run_id' is required to determine `_args`, dropped later _meta = _meta[list(set(meta).union(["version", "run_id"]))] @@ -578,7 +578,7 @@ def query(self, default_only=True, meta=True, **kwargs): # merge meta indicators (if requested) and cast to IamDataFrame if meta: # 'run_id' is necessary to retrieve data, not returned by default - if not (islistable(meta) and "run_id" in meta): + if not (is_list_like(meta) and "run_id" in meta): _meta.drop(columns="run_id", inplace=True) return IamDataFrame(data, meta=_meta, index=index) else: diff --git a/pyam/plotting.py b/pyam/plotting.py index c98377042..4a9e442c5 100644 --- a/pyam/plotting.py +++ b/pyam/plotting.py @@ -13,12 +13,12 @@ from pyam.run_control import run_control from pyam.figures import sankey from pyam.timeseries import cross_threshold +from pyam.str import is_str from pyam.utils import ( META_IDX, IAMC_IDX, SORT_IDX, YEAR_IDX, - isstr, to_list, ) from pyam.logging import raise_data_error @@ -177,7 +177,7 @@ def mpl_args_to_meta_cols(df, **kwargs): """Return the kwargs values (not keys) matching a `df.meta` column name""" cols = set() for arg, value in kwargs.items(): - if isstr(value) and value in df.meta.columns: + if is_str(value) and value in df.meta.columns: cols.add(value) return list(cols) diff --git a/pyam/run_control.py b/pyam/run_control.py index 1e90585da..2c047137e 100644 --- a/pyam/run_control.py +++ b/pyam/run_control.py @@ -3,7 +3,7 @@ import yaml from collections.abc import Mapping -from pyam.utils import isstr +from pyam.str import is_str # user-defined defaults for various plot settings _RUN_CONTROL = None @@ -119,9 +119,9 @@ def _load_yaml(self, obj): check_rel_paths = False if hasattr(obj, "read"): # it's a file obj = obj.read() - if isstr(obj) and not os.path.exists(obj): + if is_str(obj) and not os.path.exists(obj): raise IOError("File {} does not exist".format(obj)) - if isstr(obj) and os.path.exists(obj): + if is_str(obj) and os.path.exists(obj): check_rel_paths = True fname = obj with open(fname) as f: diff --git a/pyam/statistics.py b/pyam/statistics.py index c78604a3a..a7e0542ba 100644 --- a/pyam/statistics.py +++ b/pyam/statistics.py @@ -1,8 +1,10 @@ from copy import deepcopy import numpy as np import pandas as pd -from pyam import filter_by_meta, META_IDX -from pyam.utils import isstr, islistable + +from pyam import filter_by_meta +from pyam.str import is_str +from pyam.utils import is_list_like, META_IDX class Statistics(object): @@ -35,7 +37,7 @@ def __init__( # assing `groupby` settings and check that specifications are valid self.col = None self.groupby = None - if isstr(groupby): + if is_str(groupby): self.col = groupby self.groupby = {groupby: None} elif isinstance(groupby, dict) and len(groupby) == 1: @@ -67,14 +69,14 @@ def __init__( # assing `filters` settings and check that specifications are valid for idx, _filter in self.filters: # check that index in tuple is valid - if isstr(idx): + if is_str(idx): self._add_to_index(idx) else: if not ( isinstance(idx, tuple) and len(idx) == 2 - and isstr(idx[0]) - or not isstr(idx[1]) + and is_str(idx[0]) + or not is_str(idx[1]) ): raise ValueError("`{}` is not a valid index".format(idx)) self._add_to_index(idx[0], idx[1]) @@ -123,7 +125,7 @@ def _add_to_index(self, idx, sub_idx=None): def _add_to_header(self, header, subheader): if header not in self._headers: self._headers.append(header) - if islistable(subheader): + if is_list_like(subheader): for s in subheader: if s not in self._subheaders: self._subheaders.append(s) diff --git a/pyam/str.py b/pyam/str.py new file mode 100644 index 000000000..80b1aae06 --- /dev/null +++ b/pyam/str.py @@ -0,0 +1,126 @@ +from pandas.api.types import is_list_like +import numpy as np +import six +import re + + +def concat_with_pipe(x, cols=None): + """Concatenate a pandas.Series x using ``|``, drop None or numpy.nan""" + cols = cols or x.index + return "|".join([x[i] for i in cols if x[i] not in [None, np.nan]]) + + +def find_depth(data, s="", level=None): + """Return or assert the depth (number of ``|``) of variables + + Parameters + ---------- + data : str or list of strings + IAMC-style variables + s : str, default '' + remove leading `s` from any variable in `data` + level : int or str, optional + If None, return depth (number of ``|``); else, return list of booleans + whether depth satisfies the condition (equality if level is int, + >= if ``.+``, <= if ``.-``) + """ + if is_list_like(level): + raise ValueError( + "Level is only run with ints or strings, not lists. Use strings with " + "integers and + or - to filter by ranges." + ) + if is_str(data): + return _find_depth([data], s, level)[0] + + return _find_depth(data, s, level) + + +def _find_depth(data, s="", level=None): + """Internal implementation of `find_depth()´""" + # remove wildcard as last character from string, escape regex characters + _s = re.compile("^" + escape_regexp(s.rstrip("*"))) + _p = re.compile("\\|") + + # find depth + def _count_pipes(val): + return len(_p.findall(re.sub(_s, "", val))) if _s.match(val) else None + + n_pipes = map(_count_pipes, data if is_list_like(data) else list(data)) + + # if no level test is specified, return the depth as (list of) int + if level is None: + return list(n_pipes) + + # if `level` is given, set function for finding depth level =, >=, <= |s + if not is_str(level): + test = lambda x: level == x if x is not None else False + elif level[-1] == "-": + level = int(level[:-1]) + test = lambda x: level >= x if x is not None else False + elif level[-1] == "+": + level = int(level[:-1]) + test = lambda x: level <= x if x is not None else False + else: + raise ValueError("Unknown level type: `{}`".format(level)) + + return list(map(test, n_pipes)) + + +def get_variable_components(x, level, join=False): + """Return components for requested level in a list or join these in a str. + + Parameters + ---------- + x : str + Uses ``|`` to separate the components of the variable. + level : int or list of int + Position of the component. + join : bool or str, optional + If True, IAMC-style (``|``) is used as separator for joined components. + + Returns + ------- + str + """ + _x = x.split("|") + if join is False: + return [_x[i] for i in level] if is_list_like(level) else _x[level] + else: + level = [level] if type(level) == int else level + join = "|" if join is True else join + return join.join([_x[i] for i in level]) + + +def reduce_hierarchy(x, depth): + """Reduce the hierarchy (indicated by ``|``) of x to the specified depth + + Parameters + ---------- + x : str + Uses ``|`` to separate the components of the variable. + level : int or list of int + Position of the component.s + + """ + _x = x.split("|") + depth = len(_x) + depth - 1 if depth < 0 else depth + return "|".join(_x[0 : (depth + 1)]) + + +def escape_regexp(s): + """Escape characters with specific regexp use""" + return ( + str(s) + .replace("|", "\\|") + .replace(".", "\.") # `.` has to be replaced before `*` + .replace("*", ".*") + .replace("+", "\+") + .replace("(", "\(") + .replace(")", "\)") + .replace("$", "\\$") + ) + + +def is_str(x): + """Returns True if x is a string""" + return isinstance(x, six.string_types) diff --git a/pyam/timeseries.py b/pyam/timeseries.py index 54d8f57d2..8e88cb082 100644 --- a/pyam/timeseries.py +++ b/pyam/timeseries.py @@ -2,7 +2,8 @@ import math import numpy as np import pandas as pd -from pyam.utils import isstr, to_int, raise_data_error +from pyam.str import is_str +from pyam.utils import to_int, raise_data_error logger = logging.getLogger(__name__) @@ -102,7 +103,7 @@ def cross_threshold( return_type : type, optional Whether to cast the returned values to integer (years) """ - direction = [direction] if isstr(direction) else list(direction) + direction = [direction] if is_str(direction) else list(direction) if not set(direction).issubset(set(["from above", "from below"])): raise ValueError(f"Invalid direction: {direction}") diff --git a/pyam/unfccc.py b/pyam/unfccc.py index da9f081af..21e4a725c 100644 --- a/pyam/unfccc.py +++ b/pyam/unfccc.py @@ -10,7 +10,8 @@ HAS_UNFCCC = False from pyam import IamDataFrame -from pyam.utils import pattern_match, isstr, to_list +from pyam.str import is_str +from pyam.utils import pattern_match, to_list # columns from UNFCCC data that can be used for variable names NAME_COLS = ["category", "classification", "measure", "gas"] @@ -128,7 +129,7 @@ def read_unfccc( # drop unspecified rows and columns, rename value column cols = ["party", "variable", "unit", "year", "gas", "numberValue"] - data = data.loc[[isstr(i) for i in data.variable], cols] + data = data.loc[[is_str(i) for i in data.variable], cols] data.rename(columns={"numberValue": "value"}, inplace=True) # append `gas` to unit, drop `gas` column diff --git a/pyam/utils.py b/pyam/utils.py index 0bb3722fb..4a8ab98d8 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -2,15 +2,15 @@ import itertools import logging import string -import six import re import dateutil - -from pyam.index import get_index_levels, replace_index_labels -from pyam.logging import raise_data_error import numpy as np import pandas as pd -from collections.abc import Iterable +from pandas.api.types import is_list_like, is_float + +from pyam.index import get_index_levels, replace_index_labels +from pyam.str import concat_with_pipe, escape_regexp, find_depth, is_str +from pyam.logging import raise_data_error, deprecation_warning logger = logging.getLogger(__name__) @@ -74,23 +74,30 @@ def wrapper(*args, **kwargs): def isstr(x): - """Returns True if x is a string""" - return isinstance(x, six.string_types) + # TODO deprecated, remove for release >= 2.1 + deprecation_warning("Please use `pyam.str.is_str()`.", "The function `isstr()`") + return is_str(x) def isscalar(x): - """Returns True if x is a scalar""" - return not isinstance(x, Iterable) or isstr(x) + # TODO deprecated, remove for release >= 2.1 + deprecation_warning( + "Please use `pandas.api.types.is_float()`.", "The function `isscalar()`" + ) + return is_float(x) def islistable(x): - """Returns True if x is a list but not a string""" - return isinstance(x, Iterable) and not isstr(x) + # TODO deprecated, remove for release >= 2.1 + deprecation_warning( + "Please use `pyam.utils.is_list_like()`.", "The function `islistable()`" + ) + return is_list_like(x) def to_list(x): """Return x as a list""" - return x if islistable(x) else [x] + return x if is_list_like(x) else [x] def remove_from_list(x, items): @@ -205,7 +212,7 @@ def _knead_data(df, **kwargs): # melt value columns and use column name as `variable` if "value" in kwargs and "variable" not in kwargs: value = kwargs.pop("value") - value = value if islistable(value) else [value] + value = value if is_list_like(value) else [value] _df = df.set_index(list(set(df.columns) - set(value))) dfs = [] for v in value: @@ -221,12 +228,12 @@ def _knead_data(df, **kwargs): if col in df: raise ValueError(f"Conflict of kwarg with column `{col}` in dataframe!") - if isstr(value) and value in df: + if is_str(value) and value in df: df.rename(columns={value: col}, inplace=True) - elif islistable(value) and all([c in df.columns for c in value]): + elif is_list_like(value) and all([c in df.columns for c in value]): df[col] = df.apply(lambda x: concat_with_pipe(x, value), axis=1) df.drop(value, axis=1, inplace=True) - elif isstr(value): + elif is_str(value): df[col] = value else: raise ValueError(f"Invalid argument for casting `{col}: {value}`") @@ -397,7 +404,7 @@ def format_data(df, index, **kwargs): # all lower case df.rename( - columns={c: str(c).lower() for c in df.columns if isstr(c)}, inplace=True + columns={c: str(c).lower() for c in df.columns if is_str(c)}, inplace=True ) if "notes" in df.columns: # this came from a legacy database (SSP or earlier) @@ -513,62 +520,6 @@ def merge_exclude(left, right, ignore_conflict=False): return pd.concat([left, right.loc[diff]], sort=False) -def find_depth(data, s="", level=None): - """Return or assert the depth (number of ``|``) of variables - - Parameters - ---------- - data : str or list of strings - IAMC-style variables - s : str, default '' - remove leading `s` from any variable in `data` - level : int or str, optional - If None, return depth (number of ``|``); else, return list of booleans - whether depth satisfies the condition (equality if level is int, - >= if ``.+``, <= if ``.-``) - """ - if islistable(level): - raise ValueError( - "Level is only run with ints or strings, not lists. Use strings with " - "integers and + or - to filter by ranges." - ) - if isstr(data): - return _find_depth([data], s, level)[0] - - return _find_depth(data, s, level) - - -def _find_depth(data, s="", level=None): - """Internal implementation of `find_depth()´""" - # remove wildcard as last character from string, escape regex characters - _s = re.compile("^" + _escape_regexp(s.rstrip("*"))) - _p = re.compile("\\|") - - # find depth - def _count_pipes(val): - return len(_p.findall(re.sub(_s, "", val))) if _s.match(val) else None - - n_pipes = map(_count_pipes, to_list(data)) - - # if no level test is specified, return the depth as (list of) int - if level is None: - return list(n_pipes) - - # if `level` is given, set function for finding depth level =, >=, <= |s - if not isstr(level): - test = lambda x: level == x if x is not None else False - elif level[-1] == "-": - level = int(level[:-1]) - test = lambda x: level >= x if x is not None else False - elif level[-1] == "+": - level = int(level[:-1]) - test = lambda x: level <= x if x is not None else False - else: - raise ValueError("Unknown level type: `{}`".format(level)) - - return list(map(test, n_pipes)) - - def pattern_match( data, values, level=None, regexp=False, has_nan=False, return_codes=False ): @@ -580,7 +531,7 @@ def pattern_match( """ codes = [] matches = np.zeros(len(data), dtype=bool) - values = values if islistable(values) else [values] + values = values if is_list_like(values) else [values] # issue (#40) with string-to-nan comparison, replace nan by empty string _data = data.fillna("") if has_nan else data @@ -593,8 +544,8 @@ def pattern_match( except KeyError: pass - if isstr(s): - pattern = re.compile(_escape_regexp(s) + "$" if not regexp else s) + if is_str(s): + pattern = re.compile(escape_regexp(s) + "$" if not regexp else s) depth = True if level is None else find_depth(_data, s, level) matches |= data.str.match(pattern) & depth else: @@ -607,20 +558,6 @@ def pattern_match( return matches -def _escape_regexp(s): - """Escape characters with specific regexp use""" - return ( - str(s) - .replace("|", "\\|") - .replace(".", "\.") # `.` has to be replaced before `*` - .replace("*", ".*") - .replace("+", "\+") - .replace("(", "\(") - .replace(")", "\)") - .replace("$", "\\$") - ) - - def print_list(x, n): """Return a printable string of a list shortened to n characters""" # if list is empty, only write count @@ -704,40 +641,6 @@ def to_int(x, index=False): return _x -def concat_with_pipe(x, cols=None): - """Concatenate a pandas.Series x using ``|``, drop None or numpy.nan""" - cols = cols or x.index - return "|".join([x[i] for i in cols if x[i] not in [None, np.nan]]) - - -def reduce_hierarchy(x, depth): - """Reduce the hierarchy (indicated by ``|``) of x to the specified depth""" - _x = x.split("|") - depth = len(_x) + depth - 1 if depth < 0 else depth - return "|".join(_x[0 : (depth + 1)]) - - -def get_variable_components(x, level, join=False): - """Return components for requested level in a list or join these in a str. - - Parameters - ---------- - x : str - Uses ``|`` to separate the components of the variable. - level : int or list of int - Position of the component. - join : bool or str, optional - If True, IAMC-style (``|``) is used as separator for joined components. - """ - _x = x.split("|") - if join is False: - return [_x[i] for i in level] if islistable(level) else _x[level] - else: - level = [level] if type(level) == int else level - join = "|" if join is True else join - return join.join([_x[i] for i in level]) - - def s(n): """Return an s if n!=1 for nicer formatting of log messages""" return "s" if n != 1 else "" diff --git a/tests/conftest.py b/tests/conftest.py index 92a08fef7..21a2d9864 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,8 @@ import pandas as pd from datetime import datetime -from pyam import IamDataFrame, META_IDX, IAMC_IDX, iiasa +from pyam import IamDataFrame, iiasa +from pyam.utils import META_IDX, IAMC_IDX # verify whether IIASA database API can be reached, skip tests otherwise diff --git a/tests/test_core.py b/tests/test_core.py index 698f1babc..22afd6014 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8,9 +8,10 @@ from numpy import testing as npt from pandas import testing as pdt -from pyam import IamDataFrame, filter_by_meta, META_IDX, IAMC_IDX, sort_data +from pyam import IamDataFrame, filter_by_meta from pyam.core import _meta_idx -from pyam.utils import isstr +from pyam.str import is_str +from pyam.utils import IAMC_IDX, META_IDX, sort_data from pyam.testing import assert_iamframe_equal @@ -534,7 +535,7 @@ def test_interpolate(test_pd_df): assert not df._data.index.duplicated().any() # assert that extra_col does not have nan's (check for #351) - assert all([True if isstr(i) else ~np.isnan(i) for i in df.foo]) + assert all([True if is_str(i) else ~np.isnan(i) for i in df.foo]) def test_interpolate_time_exists(test_df_year): diff --git a/tests/test_datareader.py b/tests/test_datareader.py index 879af2e8c..39e0c6598 100644 --- a/tests/test_datareader.py +++ b/tests/test_datareader.py @@ -3,7 +3,8 @@ import logging import pandas as pd -from pyam import IamDataFrame, IAMC_IDX, read_worldbank +from pyam import IamDataFrame, read_worldbank +from pyam.utils import IAMC_IDX from pyam.testing import assert_iamframe_equal from pandas_datareader import wb diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py index 0adcf1220..eac3c5221 100644 --- a/tests/test_feature_aggregate.py +++ b/tests/test_feature_aggregate.py @@ -3,7 +3,8 @@ import numpy as np import pandas as pd -from pyam import check_aggregate, IamDataFrame, IAMC_IDX +from pyam import check_aggregate, IamDataFrame +from pyam.utils import IAMC_IDX from pyam.testing import assert_iamframe_equal from .conftest import DTS_MAPPING diff --git a/tests/test_feature_append_concat.py b/tests/test_feature_append_concat.py index 45f60dfb1..5284c8b17 100644 --- a/tests/test_feature_append_concat.py +++ b/tests/test_feature_append_concat.py @@ -5,7 +5,9 @@ import pandas.testing as pdt from datetime import datetime -from pyam import IamDataFrame, IAMC_IDX, META_IDX, assert_iamframe_equal, concat +from pyam import IamDataFrame, concat +from pyam.testing import assert_iamframe_equal +from pyam.utils import IAMC_IDX, META_IDX from .conftest import TEST_DF, META_COLS, META_DF diff --git a/tests/test_feature_compare.py b/tests/test_feature_compare.py index fa178fb51..3719606ac 100644 --- a/tests/test_feature_compare.py +++ b/tests/test_feature_compare.py @@ -2,7 +2,8 @@ import numpy as np import pandas as pd -from pyam import compare, IAMC_IDX +from pyam import compare +from pyam.utils import IAMC_IDX def test_compare(test_df): diff --git a/tests/test_feature_growth_rate.py b/tests/test_feature_growth_rate.py index d709afbb6..fa9ea3b61 100644 --- a/tests/test_feature_growth_rate.py +++ b/tests/test_feature_growth_rate.py @@ -1,7 +1,8 @@ import math import pandas as pd import pandas.testing as pdt -from pyam import IamDataFrame, IAMC_IDX +from pyam import IamDataFrame +from pyam.utils import IAMC_IDX from pyam.testing import assert_iamframe_equal from pyam.timeseries import growth_rate import pytest diff --git a/tests/test_feature_learning_rate.py b/tests/test_feature_learning_rate.py index afc117427..d674839ad 100644 --- a/tests/test_feature_learning_rate.py +++ b/tests/test_feature_learning_rate.py @@ -1,5 +1,6 @@ import pandas as pd -from pyam import IamDataFrame, IAMC_IDX +from pyam import IamDataFrame +from pyam.utils import IAMC_IDX from pyam.testing import assert_iamframe_equal import pytest diff --git a/tests/test_feature_rename.py b/tests/test_feature_rename.py index fd74175f4..19e98d034 100644 --- a/tests/test_feature_rename.py +++ b/tests/test_feature_rename.py @@ -5,7 +5,8 @@ import pandas as pd from numpy import testing as npt -from pyam import IamDataFrame, META_IDX, IAMC_IDX, compare +from pyam import IamDataFrame, compare +from pyam.utils import IAMC_IDX, META_IDX from pyam.testing import assert_iamframe_equal from .conftest import META_COLS diff --git a/tests/test_feature_validation.py b/tests/test_feature_validation.py index 89122c1e4..f5d19974e 100644 --- a/tests/test_feature_validation.py +++ b/tests/test_feature_validation.py @@ -2,7 +2,8 @@ import pandas.testing as pdt import pytest -from pyam import IamDataFrame, validate, categorize, require_variable, META_IDX +from pyam import IamDataFrame, validate, categorize, require_variable +from pyam.utils import META_IDX @pytest.mark.parametrize( diff --git a/tests/test_filter.py b/tests/test_filter.py index 8e1065a15..a20f080ca 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -6,7 +6,8 @@ import pandas.testing as pdt import pytest -from pyam import IamDataFrame, IAMC_IDX +from pyam import IamDataFrame +from pyam.utils import IAMC_IDX from .conftest import EXP_DATETIME_INDEX diff --git a/tests/test_iiasa.py b/tests/test_iiasa.py index d02314959..a98417fac 100644 --- a/tests/test_iiasa.py +++ b/tests/test_iiasa.py @@ -7,7 +7,8 @@ import numpy.testing as npt import yaml -from pyam import IamDataFrame, iiasa, lazy_read_iiasa, read_iiasa, META_IDX +from pyam import IamDataFrame, iiasa, lazy_read_iiasa, read_iiasa +from pyam.utils import META_IDX from pyam.testing import assert_iamframe_equal from .conftest import META_COLS, IIASA_UNAVAILABLE, TEST_API, TEST_API_NAME diff --git a/tests/test_index.py b/tests/test_index.py index 83afbd39d..b65e36813 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -8,7 +8,7 @@ replace_index_labels, append_index_level, ) -from pyam import IAMC_IDX +from pyam.utils import IAMC_IDX def test_get_index_levels(test_df_index): diff --git a/tests/test_ops.py b/tests/test_ops.py index 3ef764c6e..fbe60b9d1 100644 --- a/tests/test_ops.py +++ b/tests/test_ops.py @@ -4,7 +4,8 @@ import pint from iam_units import registry -from pyam import IamDataFrame, IAMC_IDX +from pyam import IamDataFrame +from pyam.utils import IAMC_IDX from pyam.testing import assert_iamframe_equal from pyam._ops import _op_data diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 42551d2b8..24ce15818 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -10,7 +10,8 @@ from contextlib import contextmanager import pyam -from pyam import plotting, run_control, reset_rc_defaults +from pyam import plotting +from pyam.run_control import run_control, reset_rc_defaults from .conftest import IMAGE_BASELINE_DIR diff --git a/tests/test_run_control.py b/tests/test_run_control.py index 3b1447aba..65c419233 100644 --- a/tests/test_run_control.py +++ b/tests/test_run_control.py @@ -1,7 +1,8 @@ import os import pytest -from pyam import IamDataFrame, run_control +from pyam import IamDataFrame +from pyam.run_control import run_control from .conftest import TEST_DATA_DIR, TEST_DF diff --git a/tests/test_string.py b/tests/test_string.py new file mode 100644 index 000000000..3c1981d4f --- /dev/null +++ b/tests/test_string.py @@ -0,0 +1,131 @@ +import numpy as np +import pandas as pd +import pytest + +from pyam.str import ( + find_depth, + get_variable_components, + reduce_hierarchy, + concat_with_pipe, +) + +TEST_VARS = ["foo", "foo|bar", "foo|bar|baz"] +TEST_CONCAT_SERIES = pd.Series(["foo", "bar", "baz"], index=["f", "b", "z"]) + + +def test_find_depth_as_list(): + obs = find_depth(TEST_VARS) + assert obs == [0, 1, 2] + + +def test_find_depth_as_str(): + assert find_depth("foo|bar|baz") == 2 + + +def test_find_depth_with_str(): + data = pd.Series(["foo", "foo|bar|baz", "bar|baz", "bar|baz|foo"]) + obs = find_depth(data, "bar") + assert obs == [None, None, 1, 2] + + +def test_find_depth_with_str_1(): + data = pd.Series(["foo", "foo|bar|baz", "bar|baz", "bar|baz|foo"]) + obs = find_depth(data, "bar|", 1) + assert obs == [False, False, False, True] + + +def test_find_depth_with_str_0(): + data = pd.Series(["foo", "foo|bar|baz", "bar|baz", "bar|baz|foo"]) + obs = find_depth(data, "*bar|", 0) + assert obs == [False, True, True, False] + + +def test_find_depth_0(): + obs = find_depth(TEST_VARS, level=0) + assert obs == [True, False, False] + + +def test_find_depth_0_minus(): + obs = find_depth(TEST_VARS, level="0-") + assert obs == [True, False, False] + + +def test_find_depth_0_plus(): + obs = find_depth(TEST_VARS, level="0+") + assert obs == [True, True, True] + + +def test_find_depth_1(): + obs = find_depth(TEST_VARS, level=1) + assert obs == [False, True, False] + + +def test_find_depth_1_minus(): + obs = find_depth(TEST_VARS, level="1-") + assert obs == [True, True, False] + + +def test_find_depth_1_plus(): + obs = find_depth(TEST_VARS, level="1+") + assert obs == [False, True, True] + + +def test_concat_with_pipe_all(): + obs = concat_with_pipe(TEST_CONCAT_SERIES) + assert obs == "foo|bar|baz" + + +def test_concat_with_pipe_exclude_none(): + s = TEST_CONCAT_SERIES.copy() + s["b"] = None + obs = concat_with_pipe(s) + assert obs == "foo|baz" + + +def test_concat_with_pipe_exclude_nan(): + s = TEST_CONCAT_SERIES.copy() + s["b"] = np.nan + obs = concat_with_pipe(s) + assert obs == "foo|baz" + + +def test_concat_with_pipe_by_name(): + obs = concat_with_pipe(TEST_CONCAT_SERIES, ["f", "z"]) + assert obs == "foo|baz" + + +def test_reduce_hierarchy_0(): + assert reduce_hierarchy("foo|bar|baz", 0) == "foo" + + +def test_reduce_hierarchy_1(): + assert reduce_hierarchy("foo|bar|baz", 1) == "foo|bar" + + +def test_reduce_hierarchy_neg1(): + assert reduce_hierarchy("foo|bar|baz", -1) == "foo|bar" + + +def test_reduce_hierarchy_neg2(): + assert reduce_hierarchy("foo|bar|baz", -2) == "foo" + + +def test_get_variable_components_int(): + assert get_variable_components("foo|bar|baz", 1) == "bar" + + +def test_get_variable_components_list(): + assert get_variable_components("foo|bar|baz", [1, 2]) == ["bar", "baz"] + + +def test_get_variable_components_indexError(): + with pytest.raises(IndexError): + get_variable_components("foo|bar|baz", 3) + + +def test_get_variable_components_join_true(): + assert get_variable_components("foo|bar|baz", [0, 2], join=True) == "foo|baz" + + +def test_get_variable_components_join_str(): + assert get_variable_components("foo|bar|baz", [2, 1], join="_") == "baz_bar" diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index 597289fb8..06a117108 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -5,9 +5,11 @@ import numpy as np import pandas as pd -from pyam import fill_series, cumulative, cross_threshold, to_int import pytest +from pyam.timeseries import fill_series, cumulative, cross_threshold +from pyam.utils import to_int + def test_fill_series(): # note that the series is not order and the index is defined as float diff --git a/tests/test_utils.py b/tests/test_utils.py index fe9c8b900..5ef3e6012 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,17 +5,19 @@ from pandas import Timestamp from datetime import datetime -from pyam import utils, META_IDX - -TEST_VARS = ["foo", "foo|bar", "foo|bar|baz"] -TEST_CONCAT_SERIES = pd.Series(["foo", "bar", "baz"], index=["f", "b", "z"]) +from pyam.utils import ( + META_IDX, + pattern_match, + merge_meta, + to_time, +) def test_pattern_match_none(): data = pd.Series(["foo", "bar"]) values = ["baz"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [False, False]).all() @@ -23,7 +25,7 @@ def test_pattern_match_nan(): data = pd.Series(["foo", np.nan]) values = ["baz"] - obs = utils.pattern_match(data, values, has_nan=True) + obs = pattern_match(data, values, has_nan=True) assert (obs == [False, False]).all() @@ -31,7 +33,7 @@ def test_pattern_match_one(): data = pd.Series(["foo", "bar"]) values = ["foo"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [True, False]).all() @@ -39,7 +41,7 @@ def test_pattern_match_str_regex(): data = pd.Series(["foo", "foo2", "bar"]) values = ["foo"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [True, False, False]).all() @@ -47,7 +49,7 @@ def test_pattern_match_ast_regex(): data = pd.Series(["foo", "foo2", "bar"]) values = ["foo*"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [True, True, False]).all() @@ -55,7 +57,7 @@ def test_pattern_match_ast2_regex(): data = pd.Series(["foo|bar", "foo", "bar"]) values = ["*o*b*"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [True, False, False]).all() @@ -63,7 +65,7 @@ def test_pattern_match_plus(): data = pd.Series(["foo", "foo+", "+bar", "b+az"]) values = ["*+*"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [False, True, True, True]).all() @@ -71,7 +73,7 @@ def test_pattern_match_dot(): data = pd.Series(["foo", "fo."]) values = ["fo."] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [False, True]).all() @@ -79,7 +81,7 @@ def test_pattern_match_brackets(): data = pd.Series(["foo (bar)", "foo bar"]) values = ["foo (bar)"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [True, False]).all() @@ -87,7 +89,7 @@ def test_pattern_match_dollar(): data = pd.Series(["foo$bar", "foo"]) values = ["foo$bar"] - obs = utils.pattern_match(data, values) + obs = pattern_match(data, values) assert (obs == [True, False]).all() @@ -95,107 +97,10 @@ def test_pattern_regexp(): data = pd.Series(["foo", "foa", "foo$"]) values = ["fo.$"] - obs = utils.pattern_match(data, values, regexp=True) + obs = pattern_match(data, values, regexp=True) assert (obs == [True, True, False]).all() -def test_find_depth_as_list(): - obs = utils.find_depth(TEST_VARS) - assert obs == [0, 1, 2] - - -def test_find_depth_as_str(): - assert utils.find_depth("foo|bar|baz") == 2 - - -def test_find_depth_with_str(): - data = pd.Series(["foo", "foo|bar|baz", "bar|baz", "bar|baz|foo"]) - obs = utils.find_depth(data, "bar") - assert obs == [None, None, 1, 2] - - -def test_find_depth_with_str_1(): - data = pd.Series(["foo", "foo|bar|baz", "bar|baz", "bar|baz|foo"]) - obs = utils.find_depth(data, "bar|", 1) - assert obs == [False, False, False, True] - - -def test_find_depth_with_str_0(): - data = pd.Series(["foo", "foo|bar|baz", "bar|baz", "bar|baz|foo"]) - obs = utils.find_depth(data, "*bar|", 0) - assert obs == [False, True, True, False] - - -def test_find_depth_0(): - obs = utils.find_depth(TEST_VARS, level=0) - assert obs == [True, False, False] - - -def test_find_depth_0_minus(): - obs = utils.find_depth(TEST_VARS, level="0-") - assert obs == [True, False, False] - - -def test_find_depth_0_plus(): - obs = utils.find_depth(TEST_VARS, level="0+") - assert obs == [True, True, True] - - -def test_find_depth_1(): - obs = utils.find_depth(TEST_VARS, level=1) - assert obs == [False, True, False] - - -def test_find_depth_1_minus(): - obs = utils.find_depth(TEST_VARS, level="1-") - assert obs == [True, True, False] - - -def test_find_depth_1_plus(): - obs = utils.find_depth(TEST_VARS, level="1+") - assert obs == [False, True, True] - - -def test_concat_with_pipe_all(): - obs = utils.concat_with_pipe(TEST_CONCAT_SERIES) - assert obs == "foo|bar|baz" - - -def test_concat_with_pipe_exclude_none(): - s = TEST_CONCAT_SERIES.copy() - s["b"] = None - obs = utils.concat_with_pipe(s) - assert obs == "foo|baz" - - -def test_concat_with_pipe_exclude_nan(): - s = TEST_CONCAT_SERIES.copy() - s["b"] = np.nan - obs = utils.concat_with_pipe(s) - assert obs == "foo|baz" - - -def test_concat_with_pipe_by_name(): - obs = utils.concat_with_pipe(TEST_CONCAT_SERIES, ["f", "z"]) - assert obs == "foo|baz" - - -def test_reduce_hierarchy_0(): - assert utils.reduce_hierarchy("foo|bar|baz", 0) == "foo" - - -def test_reduce_hierarchy_1(): - assert utils.reduce_hierarchy("foo|bar|baz", 1) == "foo|bar" - - -def test_reduce_hierarchy_neg1(): - assert utils.reduce_hierarchy("foo|bar|baz", -1) == "foo|bar" - - -def test_reduce_hierarchy_neg2(): - assert utils.reduce_hierarchy("foo|bar|baz", -2) == "foo" - - def test_merge_meta(): # test merging of two meta tables left = pd.DataFrame( @@ -214,7 +119,7 @@ def test_merge_meta(): ).set_index(META_IDX) # merge conflict raises an error - pytest.raises(ValueError, utils.merge_meta, left, right) + pytest.raises(ValueError, merge_meta, left, right) # merge conflict ignoring errors yields expected results exp = pd.DataFrame( @@ -226,31 +131,10 @@ def test_merge_meta(): columns=META_IDX + ["string", "value", "value2"], ).set_index(META_IDX) - obs = utils.merge_meta(left, right, ignore_conflict=True) + obs = merge_meta(left, right, ignore_conflict=True) pdt.assert_frame_equal(exp, obs) -def test_get_variable_components_int(): - assert utils.get_variable_components("foo|bar|baz", 1) == "bar" - - -def test_get_variable_components_list(): - assert utils.get_variable_components("foo|bar|baz", [1, 2]) == ["bar", "baz"] - - -def test_get_variable_components_indexError(): - with pytest.raises(IndexError): - utils.get_variable_components("foo|bar|baz", 3) - - -def test_get_variable_components_joinTRUE(): - assert utils.get_variable_components("foo|bar|baz", [0, 2], join=True) == "foo|baz" - - -def test_get_variable_components_joinstr(): - assert utils.get_variable_components("foo|bar|baz", [2, 1], join="_") == "baz_bar" - - @pytest.mark.parametrize( "x, exp", [ @@ -260,10 +144,10 @@ def test_get_variable_components_joinstr(): ], ) def test_to_time(x, exp): - assert utils.to_time(x) == exp + assert to_time(x) == exp @pytest.mark.parametrize("x", [2.5, "2010-07-10 foo"]) def test_to_time_raises(x): with pytest.raises(ValueError, match=f"Invalid time domain: {x}"): - utils.to_time(x) + to_time(x)