diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 13fb4cd4e..f03abd2d7 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,7 +1,8 @@ # Next Release -- [#796](https://github.com/IAMconsortium/pyam/pull/796] Raise explicit error message if no connection to IIASA manager service -- [#794](https://github.com/IAMconsortium/pyam/pull/794] Fixed wrong color codes for AR6 Illustrative Pathways +- [#801](https://github.com/IAMconsortium/pyam/pull/801) Support initializing with `meta` dataframe in long format +- [#796](https://github.com/IAMconsortium/pyam/pull/796) Raise explicit error message if no connection to IIASA manager service +- [#794](https://github.com/IAMconsortium/pyam/pull/794) Fixed wrong color codes for AR6 Illustrative Pathways # Release v2.0.0 diff --git a/pyam/core.py b/pyam/core.py index a880aa38e..f219e5380 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -84,12 +84,13 @@ class IamDataFrame(object): Parameters ---------- - data : :class:`pandas.DataFrame` or file-like object as str or :class:`pathlib.Path` + data : :class:`pandas.DataFrame`, :class:`pathlib.Path` or file-like object Scenario timeseries data following the IAMC data format or a supported variation as pandas object or a path to a file. meta : :class:`pandas.DataFrame`, optional - A dataframe with suitable 'meta' indicators for the new instance. - The index will be downselected to scenarios present in `data`. + A dataframe with suitable 'meta' indicators in wide (indicator as column name) + or long (key/value columns) format. + The dataframe will be downselected to scenarios present in `data`. index : list, optional Columns to use for resulting IamDataFrame index. kwargs @@ -147,10 +148,16 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs): # if meta is given explicitly, verify that index and column names are valid if meta is not None: + if meta.index.names == [None]: + meta.set_index(index, inplace=True) if not meta.index.names == index: raise ValueError( f"Incompatible `index={index}` with `meta.index={meta.index.names}`" ) + # if meta is in "long" format as key-value columns, cast to wide format + if len(meta.columns) == 2 and all(meta.columns == ["key", "value"]): + meta = meta.pivot(values="value", columns="key") + meta.columns.name = None # try casting to Path if file-like is string or LocalPath or pytest.LocalPath try: diff --git a/tests/conftest.py b/tests/conftest.py index 7e358127f..1ed6e9da0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -57,7 +57,7 @@ ["model_a", "scen_b", 2, np.nan], ], columns=META_IDX + META_COLS, -).set_index(META_IDX) +) FULL_FEATURE_DF = pd.DataFrame( diff --git a/tests/test_core.py b/tests/test_core.py index bec7093c5..efd11e5ca 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -33,13 +33,14 @@ columns=["model", "scenario", "region", 2010, 2020], ).set_index(["model", "region"]) + META_DF = pd.DataFrame( [ - ["model_a", "scen_a", 1], - ["model_a", "scen_b", np.nan], - ["model_a", "scen_c", 2], + ["model_a", "scen_a", 1, "foo"], + ["model_a", "scen_b", np.nan, "bar"], + ["model_a", "scen_c", 2, "baz"], ], - columns=META_IDX + ["foo"], + columns=META_IDX + ["number", "string"], ).set_index(META_IDX) @@ -157,17 +158,45 @@ def test_init_df_with_extra_col(test_pd_df): pd.testing.assert_frame_equal(obs, exp) -def test_init_df_with_meta(test_pd_df): - # pass explicit meta dataframe with a scenario that doesn't exist in data - df = IamDataFrame(test_pd_df, meta=META_DF[["foo"]]) +def test_init_df_with_meta_with_index(test_pd_df): + # pass indexed meta dataframe with a scenario that doesn't exist in data + df = IamDataFrame(test_pd_df, meta=META_DF) + + # check that scenario not existing in data is removed during initialization + pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]]) + assert df.scenario == ["scen_a", "scen_b"] + + +def test_init_df_with_meta_no_index(test_pd_df): + # pass meta without index with a scenario that doesn't exist in data + df = IamDataFrame(test_pd_df, meta=META_DF.reset_index()) # check that scenario not existing in data is removed during initialization pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]]) assert df.scenario == ["scen_a", "scen_b"] +def test_init_df_with_meta_key_value(test_pd_df): + # pass meta with key-value columns with a scenario that doesn't exist in data + + meta_df = pd.DataFrame( + [ + ["model_a", "scen_a", "number", 1], + ["model_a", "scen_a", "string", "foo"], + ["model_a", "scen_b", "string", "bar"], + ["model_a", "scen_c", "number", 2], + ], + columns=META_IDX + ["key", "value"], + ) + df = IamDataFrame(test_pd_df, meta=meta_df) + + # check that scenario not existing in data is removed during initialization + pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]], check_dtype=False) + assert df.scenario == ["scen_a", "scen_b"] + + def test_init_df_with_meta_exclude_raises(test_pd_df): - # pass explicit meta dataframe with a scenario that + # pass explicit meta dataframe with a legacy "exclude" column meta = META_DF.copy() meta["exclude"] = False with pytest.raises(ValueError, match="Illegal columns in `meta`: 'exclude'"):