diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml index 33cd0418..7d683704 100644 --- a/.github/workflows/pythonapp.yml +++ b/.github/workflows/pythonapp.yml @@ -31,6 +31,10 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest + - name: Test successful parsing of yaml files run: | - pytest tests + pytest nomenclature/tests/_test_parse_yaml.py + - name: Install and test package functions + run: | + pip install --editable . + pytest nomenclature/tests diff --git a/README.md b/README.md index 3d35eda8..49136e1d 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,14 @@ industry or the building stock. In the data format, every timeseries is described by six dimensions (codes): -1. Model - [more information](model) -2. Scenario - [more information](scenario) -3. Region - [more information](region) -4. Variable - [more information](variable) -5. Unit - see the section on [variables](variable) for details +1. Model - [more information](nomenclature/definitions/model) +2. Scenario - [more information](nomenclature/definitions/scenario) +3. Region - [more information](nomenclature/definitions/region) +4. Variable - [more information](nomenclature/definitions/variable) +5. Unit - see the section on [variables](nomenclature/definitions/variable) + for details 6. Subannual (optional, default 'Year')[1] - - [more information](subannual) + [more information](nomenclature/definitions/subannual) In addition to these six dimensions, every timeseries is described by a set of **year-value** pairs. diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py new file mode 100644 index 00000000..121bea2e --- /dev/null +++ b/nomenclature/__init__.py @@ -0,0 +1,115 @@ +from pathlib import Path +import logging +import yaml +from pyam import IamDataFrame + +# set up logging formatting +logger = logging.getLogger(__name__) +stderr_info_handler = logging.StreamHandler() +formatter = logging.Formatter('%(name)s - %(levelname)s: %(message)s') +stderr_info_handler.setFormatter(formatter) +logger.addHandler(stderr_info_handler) + + +# path to nomenclature definitions +DEF_PATH = Path(__file__).parent / 'definitions' + + +def _parse_yaml(path, file='**/*', ext='.yaml'): + """Parse `file` in `path` (or all files in subfolders if `file='**/*'`)""" + dct = {} + for f in path.glob(f'{file}{ext}'): + with open(f, 'r') as stream: + _dct = yaml.safe_load(stream) + # add `file` attribute to each element in the dictionary + for key, value in _dct.items(): + value['file'] = str(f) + dct.update(_dct) + return dct + + +variables = _parse_yaml(DEF_PATH / 'variable') +"""Dictionary of variables""" + + +regions = _parse_yaml(DEF_PATH / 'region') +"""Dictionary of all regions""" + + +countries = _parse_yaml(DEF_PATH / 'region', 'countries') +"""Dictionary of countries""" + + +iso_mapping = dict( + [(countries[c]['iso3'], c) for c in countries] + + [(countries[c]['iso2'], c) for c in countries] + # add alternative iso2 codes used by the European Commission to the mapping + + [(countries[c]['iso2_alt'], c) for c in countries + if 'iso2_alt' in countries[c]] +) +"""Dictionary of iso2/iso3/alternative-iso2 codes to country names""" + + +def _add_to(mapping, key, value): + """Add key-value to mapping""" + if key not in mapping: + mapping[key] = value + elif isinstance(value, list): + mapping[key] += value + return mapping[key] + + +def _create_nuts3_hierarchy(): + """Parse nuts3.yaml and create hierarchical dictionary""" + hierarchy = dict() + keys = ['country', 'nuts1', 'nuts2'] + for n3, mapping in _parse_yaml(DEF_PATH / 'region', 'nuts3').items(): + country, n1, n2 = [mapping.get(i) for i in keys] + country_dict = _add_to(hierarchy, country, {n1: dict()}) + n1_dict = _add_to(country_dict, n1, {n2: list()}) + _add_to(n1_dict, n2, [n3]) + return hierarchy + + +nuts_hierarchy = _create_nuts3_hierarchy() +"""Hierarchical dictionary of nuts region classification""" + + +subannual = _parse_yaml(DEF_PATH / 'subannual') +"""Dictionary of subannual timeslices""" + + +def validate(df): + """Validate that all columns of a dataframe follow the nomenclature + + Parameters + ---------- + df : path to file, pandas.DataFrame, pyam.IamDataFrame (or castable object) + A timeseries dataframe following the common data format + + Returns + ------- + bool + Return `True` if all column entries in `df` are valid + or `False` otherwise + """ + df = IamDataFrame(df) + success = True + + # set up list of dimension (columns) to validate + cols = [ + ('region', regions, 's'), + ('variable', variables, 's') + ] + if 'subannual' in df.data.columns: + cols.append(('subannual', subannual, ' timeslices')) + + # iterate over dimensions and perform validation + msg = 'The following {} are not defined in the nomenclature:\n {}' + for col, codelist, ext in cols: + invalid = [c for c in df.data[col].unique() if c not in codelist] + if invalid: + success = False + logger.warning(msg.format(col + ext, invalid)) + + return success diff --git a/model/README.md b/nomenclature/definitions/model/README.md similarity index 100% rename from model/README.md rename to nomenclature/definitions/model/README.md diff --git a/region/README.md b/nomenclature/definitions/region/README.md similarity index 75% rename from region/README.md rename to nomenclature/definitions/region/README.md index e01e7510..524244c7 100644 --- a/region/README.md +++ b/nomenclature/definitions/region/README.md @@ -50,23 +50,19 @@ in the [data](data) folder. #### Example for using this codelist The code snippet (Python) below shows how to obtain the list of countries -and a mapping of ISO2-codes (including alternatives) -to the common country names. +and a mapping of ISO2/3-codes (including alternatives) +to the common country names using the installable Python package. ```python -# load countries codelist from file -import yaml -with open('countries.yaml', 'r') as stream: - country_codelist = yaml.load(stream, Loader=yaml.FullLoader) - -# translate codelist to list and mapping (dictionary) -list_of_countries = list(country_codelist) -iso2_mapping = dict( - [(country_codelist[c]['iso2'], c) for c in country_codelist] - # add alternative iso2 codes used by the European Commission to the mapping - + [(country_codelist[c]['iso2_alt'], c) for c in country_codelist - if 'iso2_alt' in country_codelist[c]] -) +>>> import nomenclature as nc +>>> list(nc.countries) +['Albania', 'Andorra', 'Austria', ..., 'United Kingdom'] +>>> nc.iso_mapping['GR'] +'Greece' +>>> nc.iso_mapping['GRC'] +'Greece' +>>> nc.iso_mapping['EL'] +'Greece' ``` ### Sub-country areas following the 'Nomenclature of Territorial Units for Statistics' (NUTS) @@ -90,41 +86,28 @@ website (last download March 27, 2020, per [@erikfilias](https://github.com/erik #### Example for using this codelist The code snippet (Python) below shows how to obtain a recursive dictionary -along the NUTS classification from the NUTS-3 codelist, i.e., +along the NUTS classification, i.e., ``` -hierarchy = { +nuts_hierarchy = { : { : { - : [], + : [], ... }, ... }, ... }, } ``` +The package also includes a `regions` dictionary with the names +of all NUTS areas. + ```python -# load NUTS-3 codelist from file -import yaml -with open(f'nuts3.yaml', 'r') as stream: - nuts3_codelist = yaml.load(stream, Loader=yaml.FullLoader) - -# auxiliary function to add key-value to object and return -def add_to(mapping, key, value): - if key not in mapping: - mapping[key] = value - elif isinstance(value, list): - mapping[key] += value - return mapping[key] - -hierarchy = dict() - -# iterate over NUTS-3 codelist and recursively add items to the hierarchy dict -for n3, mapping in nuts3_codelist.items(): - country, n1, n2 = mapping['country'], mapping['nuts1'], mapping['nuts2'] - country_dict = add_to(hierarchy, country, {n1: dict()}) - n1_dict = add_to(country_dict, n1, {n2: list()}) - add_to(n1_dict, n2, [n3]) +>>> import nomenclature as nc +>>> nc.nuts_hierarchy['Belgium']['BE2']['BE24'] +['BE241', 'BE242']] +>>> nc.regions['BE241']['name'] +'Arr. Halle-Vilvoorde' ``` ### Other sub-country area classification diff --git a/region/aggregate-regions.yaml b/nomenclature/definitions/region/aggregate-regions.yaml similarity index 100% rename from region/aggregate-regions.yaml rename to nomenclature/definitions/region/aggregate-regions.yaml diff --git a/region/countries.yaml b/nomenclature/definitions/region/countries.yaml similarity index 100% rename from region/countries.yaml rename to nomenclature/definitions/region/countries.yaml diff --git a/region/data/countries.csv b/nomenclature/definitions/region/data/countries.csv similarity index 100% rename from region/data/countries.csv rename to nomenclature/definitions/region/data/countries.csv diff --git a/region/data/write-countries.py b/nomenclature/definitions/region/data/write-countries.py similarity index 100% rename from region/data/write-countries.py rename to nomenclature/definitions/region/data/write-countries.py diff --git a/region/data/write-nuts.py b/nomenclature/definitions/region/data/write-nuts.py similarity index 100% rename from region/data/write-nuts.py rename to nomenclature/definitions/region/data/write-nuts.py diff --git a/region/nuts1.yaml b/nomenclature/definitions/region/nuts1.yaml similarity index 100% rename from region/nuts1.yaml rename to nomenclature/definitions/region/nuts1.yaml diff --git a/region/nuts2.yaml b/nomenclature/definitions/region/nuts2.yaml similarity index 100% rename from region/nuts2.yaml rename to nomenclature/definitions/region/nuts2.yaml diff --git a/region/nuts3.yaml b/nomenclature/definitions/region/nuts3.yaml similarity index 100% rename from region/nuts3.yaml rename to nomenclature/definitions/region/nuts3.yaml diff --git a/region/subcountries.yaml b/nomenclature/definitions/region/subcountries.yaml similarity index 100% rename from region/subcountries.yaml rename to nomenclature/definitions/region/subcountries.yaml diff --git a/scenario/README.md b/nomenclature/definitions/scenario/README.md similarity index 100% rename from scenario/README.md rename to nomenclature/definitions/scenario/README.md diff --git a/subannual/README.md b/nomenclature/definitions/subannual/README.md similarity index 90% rename from subannual/README.md rename to nomenclature/definitions/subannual/README.md index 091d4c39..a669242c 100644 --- a/subannual/README.md +++ b/nomenclature/definitions/subannual/README.md @@ -21,6 +21,11 @@ representative periods (e.g., "summer-day"). Each item in the codelists below includes an attribute `duration` indicating the duration relative to a normal year (i.e., not a leap year). +### Yearly data + +The default entry for the openENTRANCE data format in the "subannual" column +is "Year". Its `duratuon` attribute is set to`1`. + ### Months See [months.yaml](months.yaml) for the codelist. diff --git a/subannual/months.yaml b/nomenclature/definitions/subannual/months.yaml similarity index 100% rename from subannual/months.yaml rename to nomenclature/definitions/subannual/months.yaml diff --git a/nomenclature/definitions/subannual/year.yaml b/nomenclature/definitions/subannual/year.yaml new file mode 100644 index 00000000..4e6a28fb --- /dev/null +++ b/nomenclature/definitions/subannual/year.yaml @@ -0,0 +1,4 @@ +# Default entry for the subannual column + +Year: + duration: 1 \ No newline at end of file diff --git a/variable/README.md b/nomenclature/definitions/variable/README.md similarity index 100% rename from variable/README.md rename to nomenclature/definitions/variable/README.md diff --git a/variable/data/variables_iamc15.csv b/nomenclature/definitions/variable/data/variables_iamc15.csv similarity index 100% rename from variable/data/variables_iamc15.csv rename to nomenclature/definitions/variable/data/variables_iamc15.csv diff --git a/variable/data/write-iamc15c-variables.py b/nomenclature/definitions/variable/data/write-iamc15c-variables.py similarity index 100% rename from variable/data/write-iamc15c-variables.py rename to nomenclature/definitions/variable/data/write-iamc15c-variables.py diff --git a/variable/economy/README.md b/nomenclature/definitions/variable/economy/README.md similarity index 100% rename from variable/economy/README.md rename to nomenclature/definitions/variable/economy/README.md diff --git a/variable/economy/economy.yaml b/nomenclature/definitions/variable/economy/economy.yaml similarity index 100% rename from variable/economy/economy.yaml rename to nomenclature/definitions/variable/economy/economy.yaml diff --git a/variable/emissions/README.md b/nomenclature/definitions/variable/emissions/README.md similarity index 100% rename from variable/emissions/README.md rename to nomenclature/definitions/variable/emissions/README.md diff --git a/variable/emissions/emissions.yaml b/nomenclature/definitions/variable/emissions/emissions.yaml similarity index 100% rename from variable/emissions/emissions.yaml rename to nomenclature/definitions/variable/emissions/emissions.yaml diff --git a/variable/energy/README.md b/nomenclature/definitions/variable/energy/README.md similarity index 100% rename from variable/energy/README.md rename to nomenclature/definitions/variable/energy/README.md diff --git a/variable/energy/energy-final.yaml b/nomenclature/definitions/variable/energy/energy-final.yaml similarity index 100% rename from variable/energy/energy-final.yaml rename to nomenclature/definitions/variable/energy/energy-final.yaml diff --git a/variable/energy/energy-primary.yaml b/nomenclature/definitions/variable/energy/energy-primary.yaml similarity index 100% rename from variable/energy/energy-primary.yaml rename to nomenclature/definitions/variable/energy/energy-primary.yaml diff --git a/variable/energy/energy-secondary.yaml b/nomenclature/definitions/variable/energy/energy-secondary.yaml similarity index 100% rename from variable/energy/energy-secondary.yaml rename to nomenclature/definitions/variable/energy/energy-secondary.yaml diff --git a/variable/technology/README.md b/nomenclature/definitions/variable/technology/README.md similarity index 100% rename from variable/technology/README.md rename to nomenclature/definitions/variable/technology/README.md diff --git a/variable/technology/electricity-grid.yaml b/nomenclature/definitions/variable/technology/electricity-grid.yaml similarity index 100% rename from variable/technology/electricity-grid.yaml rename to nomenclature/definitions/variable/technology/electricity-grid.yaml diff --git a/variable/technology/power-plant.yaml b/nomenclature/definitions/variable/technology/power-plant.yaml similarity index 100% rename from variable/technology/power-plant.yaml rename to nomenclature/definitions/variable/technology/power-plant.yaml diff --git a/variable/technology/technologies.yaml b/nomenclature/definitions/variable/technology/technologies.yaml similarity index 100% rename from variable/technology/technologies.yaml rename to nomenclature/definitions/variable/technology/technologies.yaml diff --git a/tests/test_init.py b/nomenclature/tests/_test_parse_yaml.py similarity index 96% rename from tests/test_init.py rename to nomenclature/tests/_test_parse_yaml.py index c7a6fa3b..e798fb53 100644 --- a/tests/test_init.py +++ b/nomenclature/tests/_test_parse_yaml.py @@ -16,4 +16,4 @@ def test_parse_yaml_files(): print(f"Error parsing file `{file}`\n{e}\n") # tests fails if any file cannot be parsed, show list of these files - assert not lst \ No newline at end of file + assert not lst diff --git a/nomenclature/tests/test_core.py b/nomenclature/tests/test_core.py new file mode 100644 index 00000000..4ebb6872 --- /dev/null +++ b/nomenclature/tests/test_core.py @@ -0,0 +1,22 @@ +import nomenclature as nc + + +def test_variables(): + # check that regions dictionary is not empty and has specific element + assert 'Emissions|CO2' in nc.variables + + +def test_regions(): + # check that regions dictionary is not empty and has specific element + assert 'Europe' in nc.regions + + +def test_iso_mapping(): + # check that iso-mapping dictionary is not empty and has specific elements + for name in ['GR', 'GRC', 'EL']: + assert nc.iso_mapping[name] == 'Greece' + + +def test_nuts_hierarchy(): + # check that nuts-hierarchy is not empty and has specific elements + assert nc.nuts_hierarchy['Belgium']['BE2']['BE24'] == ['BE241', 'BE242'] diff --git a/requirements.txt b/requirements.txt index c3726e8b..927e9032 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ pyyaml +pyam-iamc # the pyam package is released on pypi under this name \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..cd972262 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,23 @@ +[metadata] +name = nomenclature +author = openENTRANCE consortium +author_email = huppmann@iiasa.ac.at +license = Apache License 2.0 +description = Model linkage nomenclature for the openENTRANCE project +long_description = file: README.md +long_description_content_type = text/x-md +url = https://github.com/openENTRANCE/nomenclature + +[options] +packages = nomenclature +include_package_data = True +install_requires = + setuptools >= 41 + pyyaml +setup_requires = + setuptools >= 41 + setuptools_scm + +[options.package_data] +iam_units = + nomenclature/* diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..d5d43d7c --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup(use_scm_version=True)