Refactor the nomenclature into an installable package (#38)

openENTRANCE · May 29, 2020 · 539c26d · 539c26d
1 parent d19a996
commit 539c26d
Show file tree

Hide file tree

Showing 38 changed files with 209 additions and 48 deletions.
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -31,6 +31,10 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
+    - name: Test successful parsing of yaml files
       run: |
-        pytest tests
+        pytest nomenclature/tests/_test_parse_yaml.py
+    - name: Install and test package functions
+      run: |
+        pip install --editable .
+        pytest nomenclature/tests
diff --git a/README.md b/README.md
@@ -46,13 +46,14 @@ industry or the building stock.
 
 In the data format, every timeseries is described by six dimensions (codes):
 
-1.	Model - [more information](model)
-2.	Scenario - [more information](scenario)
-3.	Region - [more information](region)
-4.	Variable - [more information](variable)
-5.	Unit - see the section on [variables](variable) for details
+1.	Model - [more information](nomenclature/definitions/model)
+2.	Scenario - [more information](nomenclature/definitions/scenario)
+3.	Region - [more information](nomenclature/definitions/region)
+4.	Variable - [more information](nomenclature/definitions/variable)
+5.	Unit - see the section on [variables](nomenclature/definitions/variable)
+    for details
 6.	Subannual (optional, default 'Year')<sup>[1]</sup> -
-    [more information](subannual)
+    [more information](nomenclature/definitions/subannual)
 
 In addition to these six dimensions, every timeseries is described by
 a set of **year-value** pairs.

diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py
@@ -0,0 +1,115 @@
+from pathlib import Path
+import logging
+import yaml
+from pyam import IamDataFrame
+
+# set up logging formatting
+logger = logging.getLogger(__name__)
+stderr_info_handler = logging.StreamHandler()
+formatter = logging.Formatter('%(name)s - %(levelname)s: %(message)s')
+stderr_info_handler.setFormatter(formatter)
+logger.addHandler(stderr_info_handler)
+
+
+# path to nomenclature definitions
+DEF_PATH = Path(__file__).parent / 'definitions'
+
+
+def _parse_yaml(path, file='**/*', ext='.yaml'):
+    """Parse `file` in `path` (or all files in subfolders if `file='**/*'`)"""
+    dct = {}
+    for f in path.glob(f'{file}{ext}'):
+        with open(f, 'r') as stream:
+            _dct = yaml.safe_load(stream)
+            # add `file` attribute to each element in the dictionary
+            for key, value in _dct.items():
+                value['file'] = str(f)
+            dct.update(_dct)
+    return dct
+
+
+variables = _parse_yaml(DEF_PATH / 'variable')
+"""Dictionary of variables"""
+
+
+regions = _parse_yaml(DEF_PATH / 'region')
+"""Dictionary of all regions"""
+
+
+countries = _parse_yaml(DEF_PATH / 'region', 'countries')
+"""Dictionary of countries"""
+
+
+iso_mapping = dict(
+    [(countries[c]['iso3'], c) for c in countries]
+    + [(countries[c]['iso2'], c) for c in countries]
+    # add alternative iso2 codes used by the European Commission to the mapping
+    + [(countries[c]['iso2_alt'], c) for c in countries
+       if 'iso2_alt' in countries[c]]
+)
+"""Dictionary of iso2/iso3/alternative-iso2 codes to country names"""
+
+
+def _add_to(mapping, key, value):
+    """Add key-value to mapping"""
+    if key not in mapping:
+        mapping[key] = value
+    elif isinstance(value, list):
+        mapping[key] += value
+    return mapping[key]
+
+
+def _create_nuts3_hierarchy():
+    """Parse nuts3.yaml and create hierarchical dictionary"""
+    hierarchy = dict()
+    keys = ['country', 'nuts1', 'nuts2']
+    for n3, mapping in _parse_yaml(DEF_PATH / 'region', 'nuts3').items():
+        country, n1, n2 = [mapping.get(i) for i in keys]
+        country_dict = _add_to(hierarchy, country, {n1: dict()})
+        n1_dict = _add_to(country_dict, n1, {n2: list()})
+        _add_to(n1_dict, n2, [n3])
+    return hierarchy
+
+
+nuts_hierarchy = _create_nuts3_hierarchy()
+"""Hierarchical dictionary of nuts region classification"""
+
+
+subannual = _parse_yaml(DEF_PATH / 'subannual')
+"""Dictionary of subannual timeslices"""
+
+
+def validate(df):
+    """Validate that all columns of a dataframe follow the nomenclature
+
+    Parameters
+    ----------
+    df : path to file, pandas.DataFrame, pyam.IamDataFrame (or castable object)
+        A timeseries dataframe following the common data format
+
+    Returns
+    -------
+    bool
+        Return `True` if all column entries in `df` are valid
+        or `False` otherwise
+    """
+    df = IamDataFrame(df)
+    success = True
+
+    # set up list of dimension (columns) to validate
+    cols = [
+        ('region', regions, 's'),
+        ('variable', variables, 's')
+    ]
+    if 'subannual' in df.data.columns:
+        cols.append(('subannual', subannual, ' timeslices'))
+
+    # iterate over dimensions and perform validation
+    msg = 'The following {} are not defined in the nomenclature:\n    {}'
+    for col, codelist, ext in cols:
+        invalid = [c for c in df.data[col].unique() if c not in codelist]
+        if invalid:
+            success = False
+            logger.warning(msg.format(col + ext, invalid))
+
+    return success
diff --git a/model/README.md → nomenclature/definitions/model/README.md b/model/README.md → nomenclature/definitions/model/README.md
diff --git a/region/README.md → nomenclature/definitions/region/README.md b/region/README.md → nomenclature/definitions/region/README.md
@@ -50,23 +50,19 @@ in the [data](data) folder.
 #### Example for using this codelist
 
 The code snippet (Python) below shows how to obtain the list of countries
-and a mapping of ISO2-codes (including alternatives)
-to the common country names.
+and a mapping of ISO2/3-codes (including alternatives)
+to the common country names using the installable Python package.
 
 ```python
-# load countries codelist from file
-import yaml
-with open('countries.yaml', 'r') as stream:
-    country_codelist = yaml.load(stream, Loader=yaml.FullLoader)
-
-# translate codelist to list and mapping (dictionary)
-list_of_countries = list(country_codelist)
-iso2_mapping = dict(
-    [(country_codelist[c]['iso2'], c) for c in country_codelist]
-    # add alternative iso2 codes used by the European Commission to the mapping
-    + [(country_codelist[c]['iso2_alt'], c) for c in country_codelist
-       if 'iso2_alt' in country_codelist[c]]
-)
+>>> import nomenclature as nc
+>>> list(nc.countries)
+['Albania', 'Andorra', 'Austria', ..., 'United Kingdom']
+>>> nc.iso_mapping['GR']
+'Greece'
+>>> nc.iso_mapping['GRC']
+'Greece'
+>>> nc.iso_mapping['EL']
+'Greece'
 ```
 
 ### Sub-country areas following the 'Nomenclature of Territorial Units for Statistics' (NUTS)
@@ -90,41 +86,28 @@ website (last download March 27, 2020, per [@erikfilias](https://github.com/erik
 #### Example for using this codelist
 
 The code snippet (Python) below shows how to obtain a recursive dictionary
-along the NUTS classification from the NUTS-3 codelist, i.e.,
+along the NUTS classification, i.e.,
 
 ```
-hierarchy = {
+nuts_hierarchy = {
     <country>: {
         <nuts1>: {
-            <nuts2>: [<list of nuts3>],
+            <nuts2>: [<list of nuts3 areas>],
             ... },
         ... },
    ... },
 }
 ```
 
+The package also includes a `regions` dictionary with the names
+of all NUTS areas.
+
 ```python
-# load NUTS-3 codelist from file
-import yaml
-with open(f'nuts3.yaml', 'r') as stream:
-    nuts3_codelist = yaml.load(stream, Loader=yaml.FullLoader)
-
-# auxiliary function to add key-value to object and return
-def add_to(mapping, key, value):
-    if key not in mapping:
-        mapping[key] = value
-    elif isinstance(value, list):
-        mapping[key] += value
-    return mapping[key]
-
-hierarchy = dict()
-
-# iterate over NUTS-3 codelist and recursively add items to the hierarchy dict
-for n3, mapping in nuts3_codelist.items():
-    country, n1, n2 = mapping['country'], mapping['nuts1'], mapping['nuts2']
-    country_dict = add_to(hierarchy, country, {n1: dict()})
-    n1_dict = add_to(country_dict, n1, {n2: list()})
-    add_to(n1_dict, n2, [n3])
+>>> import nomenclature as nc
+>>> nc.nuts_hierarchy['Belgium']['BE2']['BE24']
+['BE241', 'BE242']]
+>>> nc.regions['BE241']['name']
+'Arr. Halle-Vilvoorde'
 ```
 
 ### Other sub-country area classification

diff --git a/region/aggregate-regions.yaml → ...definitions/region/aggregate-regions.yaml b/region/aggregate-regions.yaml → ...definitions/region/aggregate-regions.yaml
diff --git a/region/countries.yaml → ...clature/definitions/region/countries.yaml b/region/countries.yaml → ...clature/definitions/region/countries.yaml
diff --git a/region/data/countries.csv → ...ure/definitions/region/data/countries.csv b/region/data/countries.csv → ...ure/definitions/region/data/countries.csv
diff --git a/region/data/write-countries.py → ...efinitions/region/data/write-countries.py b/region/data/write-countries.py → ...efinitions/region/data/write-countries.py
diff --git a/region/data/write-nuts.py → ...ure/definitions/region/data/write-nuts.py b/region/data/write-nuts.py → ...ure/definitions/region/data/write-nuts.py
diff --git a/region/nuts1.yaml → nomenclature/definitions/region/nuts1.yaml b/region/nuts1.yaml → nomenclature/definitions/region/nuts1.yaml
diff --git a/region/nuts2.yaml → nomenclature/definitions/region/nuts2.yaml b/region/nuts2.yaml → nomenclature/definitions/region/nuts2.yaml
diff --git a/region/nuts3.yaml → nomenclature/definitions/region/nuts3.yaml b/region/nuts3.yaml → nomenclature/definitions/region/nuts3.yaml
diff --git a/region/subcountries.yaml → ...ture/definitions/region/subcountries.yaml b/region/subcountries.yaml → ...ture/definitions/region/subcountries.yaml
diff --git a/scenario/README.md → nomenclature/definitions/scenario/README.md b/scenario/README.md → nomenclature/definitions/scenario/README.md
diff --git a/subannual/README.md → nomenclature/definitions/subannual/README.md b/subannual/README.md → nomenclature/definitions/subannual/README.md
@@ -21,6 +21,11 @@ representative periods (e.g., "summer-day").
 Each item in the codelists below includes an attribute `duration` indicating
 the duration relative to a normal year (i.e., not a leap year).
 
+### Yearly data
+
+The default entry for the openENTRANCE data format in the "subannual" column
+is "Year". Its `duratuon` attribute is set to`1`.
+
 ### Months
 
 See [months.yaml](months.yaml) for the codelist.

diff --git a/subannual/months.yaml → ...clature/definitions/subannual/months.yaml b/subannual/months.yaml → ...clature/definitions/subannual/months.yaml
diff --git a/nomenclature/definitions/subannual/year.yaml b/nomenclature/definitions/subannual/year.yaml
@@ -0,0 +1,4 @@
+# Default entry for the subannual column
+
+Year:
+   duration: 1
diff --git a/variable/README.md → nomenclature/definitions/variable/README.md b/variable/README.md → nomenclature/definitions/variable/README.md
diff --git a/variable/data/variables_iamc15.csv → ...itions/variable/data/variables_iamc15.csv b/variable/data/variables_iamc15.csv → ...itions/variable/data/variables_iamc15.csv
diff --git a/variable/data/write-iamc15c-variables.py → .../variable/data/write-iamc15c-variables.py b/variable/data/write-iamc15c-variables.py → .../variable/data/write-iamc15c-variables.py
diff --git a/variable/economy/README.md → ...re/definitions/variable/economy/README.md b/variable/economy/README.md → ...re/definitions/variable/economy/README.md
diff --git a/variable/economy/economy.yaml → ...definitions/variable/economy/economy.yaml b/variable/economy/economy.yaml → ...definitions/variable/economy/economy.yaml
diff --git a/variable/emissions/README.md → .../definitions/variable/emissions/README.md b/variable/emissions/README.md → .../definitions/variable/emissions/README.md
diff --git a/variable/emissions/emissions.yaml → ...nitions/variable/emissions/emissions.yaml b/variable/emissions/emissions.yaml → ...nitions/variable/emissions/emissions.yaml
diff --git a/variable/energy/README.md → ...ure/definitions/variable/energy/README.md b/variable/energy/README.md → ...ure/definitions/variable/energy/README.md
diff --git a/variable/energy/energy-final.yaml → ...nitions/variable/energy/energy-final.yaml b/variable/energy/energy-final.yaml → ...nitions/variable/energy/energy-final.yaml
diff --git a/variable/energy/energy-primary.yaml → ...tions/variable/energy/energy-primary.yaml b/variable/energy/energy-primary.yaml → ...tions/variable/energy/energy-primary.yaml
diff --git a/variable/energy/energy-secondary.yaml → ...ons/variable/energy/energy-secondary.yaml b/variable/energy/energy-secondary.yaml → ...ons/variable/energy/energy-secondary.yaml
diff --git a/variable/technology/README.md → ...definitions/variable/technology/README.md b/variable/technology/README.md → ...definitions/variable/technology/README.md
diff --git a/variable/technology/electricity-grid.yaml → ...variable/technology/electricity-grid.yaml b/variable/technology/electricity-grid.yaml → ...variable/technology/electricity-grid.yaml
diff --git a/variable/technology/power-plant.yaml → ...ions/variable/technology/power-plant.yaml b/variable/technology/power-plant.yaml → ...ions/variable/technology/power-plant.yaml
diff --git a/variable/technology/technologies.yaml → ...ons/variable/technology/technologies.yaml b/variable/technology/technologies.yaml → ...ons/variable/technology/technologies.yaml
diff --git a/tests/test_init.py → nomenclature/tests/_test_parse_yaml.py b/tests/test_init.py → nomenclature/tests/_test_parse_yaml.py
@@ -16,4 +16,4 @@ def test_parse_yaml_files():
             print(f"Error parsing file `{file}`\n{e}\n")
 
     # tests fails if any file cannot be parsed, show list of these files
-    assert not lst
+    assert not lst
diff --git a/nomenclature/tests/test_core.py b/nomenclature/tests/test_core.py
@@ -0,0 +1,22 @@
+import nomenclature as nc
+
+
+def test_variables():
+    # check that regions dictionary is not empty and has specific element
+    assert 'Emissions|CO2' in nc.variables
+
+
+def test_regions():
+    # check that regions dictionary is not empty and has specific element
+    assert 'Europe' in nc.regions
+
+
+def test_iso_mapping():
+    # check that iso-mapping dictionary is not empty and has specific elements
+    for name in ['GR', 'GRC', 'EL']:
+        assert nc.iso_mapping[name] == 'Greece'
+
+
+def test_nuts_hierarchy():
+    # check that nuts-hierarchy is not empty and has specific elements
+    assert nc.nuts_hierarchy['Belgium']['BE2']['BE24'] == ['BE241', 'BE242']
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
 pyyaml
+pyam-iamc  # the pyam package is released on pypi under this name
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,23 @@
+[metadata]
+name = nomenclature
+author = openENTRANCE consortium
+author_email = [email protected]
+license = Apache License 2.0
+description = Model linkage nomenclature for the openENTRANCE project
+long_description = file: README.md
+long_description_content_type = text/x-md
+url = https://github.com/openENTRANCE/nomenclature
+
+[options]
+packages = nomenclature
+include_package_data = True
+install_requires =
+    setuptools >= 41
+    pyyaml
+setup_requires =
+    setuptools >= 41
+    setuptools_scm
+
+[options.package_data]
+iam_units =
+    nomenclature/*
diff --git a/setup.py b/setup.py
@@ -0,0 +1,3 @@
+from setuptools import setup
+
+setup(use_scm_version=True)
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		pyyaml
		pyam-iamc # the pyam package is released on pypi under this name
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from setuptools import setup

		setup(use_scm_version=True)