Skip to content

Commit

Permalink
Refactor the nomenclature into an installable package (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored May 29, 2020
1 parent d19a996 commit 539c26d
Show file tree
Hide file tree
Showing 38 changed files with 209 additions and 48 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ jobs:
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
- name: Test successful parsing of yaml files
run: |
pytest tests
pytest nomenclature/tests/_test_parse_yaml.py
- name: Install and test package functions
run: |
pip install --editable .
pytest nomenclature/tests
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ industry or the building stock.

In the data format, every timeseries is described by six dimensions (codes):

1. Model - [more information](model)
2. Scenario - [more information](scenario)
3. Region - [more information](region)
4. Variable - [more information](variable)
5. Unit - see the section on [variables](variable) for details
1. Model - [more information](nomenclature/definitions/model)
2. Scenario - [more information](nomenclature/definitions/scenario)
3. Region - [more information](nomenclature/definitions/region)
4. Variable - [more information](nomenclature/definitions/variable)
5. Unit - see the section on [variables](nomenclature/definitions/variable)
for details
6. Subannual (optional, default 'Year')<sup>[1]</sup> -
[more information](subannual)
[more information](nomenclature/definitions/subannual)

In addition to these six dimensions, every timeseries is described by
a set of **year-value** pairs.
Expand Down
115 changes: 115 additions & 0 deletions nomenclature/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from pathlib import Path
import logging
import yaml
from pyam import IamDataFrame

# set up logging formatting
logger = logging.getLogger(__name__)
stderr_info_handler = logging.StreamHandler()
formatter = logging.Formatter('%(name)s - %(levelname)s: %(message)s')
stderr_info_handler.setFormatter(formatter)
logger.addHandler(stderr_info_handler)


# path to nomenclature definitions
DEF_PATH = Path(__file__).parent / 'definitions'


def _parse_yaml(path, file='**/*', ext='.yaml'):
"""Parse `file` in `path` (or all files in subfolders if `file='**/*'`)"""
dct = {}
for f in path.glob(f'{file}{ext}'):
with open(f, 'r') as stream:
_dct = yaml.safe_load(stream)
# add `file` attribute to each element in the dictionary
for key, value in _dct.items():
value['file'] = str(f)
dct.update(_dct)
return dct


variables = _parse_yaml(DEF_PATH / 'variable')
"""Dictionary of variables"""


regions = _parse_yaml(DEF_PATH / 'region')
"""Dictionary of all regions"""


countries = _parse_yaml(DEF_PATH / 'region', 'countries')
"""Dictionary of countries"""


iso_mapping = dict(
[(countries[c]['iso3'], c) for c in countries]
+ [(countries[c]['iso2'], c) for c in countries]
# add alternative iso2 codes used by the European Commission to the mapping
+ [(countries[c]['iso2_alt'], c) for c in countries
if 'iso2_alt' in countries[c]]
)
"""Dictionary of iso2/iso3/alternative-iso2 codes to country names"""


def _add_to(mapping, key, value):
"""Add key-value to mapping"""
if key not in mapping:
mapping[key] = value
elif isinstance(value, list):
mapping[key] += value
return mapping[key]


def _create_nuts3_hierarchy():
"""Parse nuts3.yaml and create hierarchical dictionary"""
hierarchy = dict()
keys = ['country', 'nuts1', 'nuts2']
for n3, mapping in _parse_yaml(DEF_PATH / 'region', 'nuts3').items():
country, n1, n2 = [mapping.get(i) for i in keys]
country_dict = _add_to(hierarchy, country, {n1: dict()})
n1_dict = _add_to(country_dict, n1, {n2: list()})
_add_to(n1_dict, n2, [n3])
return hierarchy


nuts_hierarchy = _create_nuts3_hierarchy()
"""Hierarchical dictionary of nuts region classification"""


subannual = _parse_yaml(DEF_PATH / 'subannual')
"""Dictionary of subannual timeslices"""


def validate(df):
"""Validate that all columns of a dataframe follow the nomenclature
Parameters
----------
df : path to file, pandas.DataFrame, pyam.IamDataFrame (or castable object)
A timeseries dataframe following the common data format
Returns
-------
bool
Return `True` if all column entries in `df` are valid
or `False` otherwise
"""
df = IamDataFrame(df)
success = True

# set up list of dimension (columns) to validate
cols = [
('region', regions, 's'),
('variable', variables, 's')
]
if 'subannual' in df.data.columns:
cols.append(('subannual', subannual, ' timeslices'))

# iterate over dimensions and perform validation
msg = 'The following {} are not defined in the nomenclature:\n {}'
for col, codelist, ext in cols:
invalid = [c for c in df.data[col].unique() if c not in codelist]
if invalid:
success = False
logger.warning(msg.format(col + ext, invalid))

return success
File renamed without changes.
61 changes: 22 additions & 39 deletions region/README.md → nomenclature/definitions/region/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,19 @@ in the [data](data) folder.
#### Example for using this codelist

The code snippet (Python) below shows how to obtain the list of countries
and a mapping of ISO2-codes (including alternatives)
to the common country names.
and a mapping of ISO2/3-codes (including alternatives)
to the common country names using the installable Python package.

```python
# load countries codelist from file
import yaml
with open('countries.yaml', 'r') as stream:
country_codelist = yaml.load(stream, Loader=yaml.FullLoader)

# translate codelist to list and mapping (dictionary)
list_of_countries = list(country_codelist)
iso2_mapping = dict(
[(country_codelist[c]['iso2'], c) for c in country_codelist]
# add alternative iso2 codes used by the European Commission to the mapping
+ [(country_codelist[c]['iso2_alt'], c) for c in country_codelist
if 'iso2_alt' in country_codelist[c]]
)
>>> import nomenclature as nc
>>> list(nc.countries)
['Albania', 'Andorra', 'Austria', ..., 'United Kingdom']
>>> nc.iso_mapping['GR']
'Greece'
>>> nc.iso_mapping['GRC']
'Greece'
>>> nc.iso_mapping['EL']
'Greece'
```

### Sub-country areas following the 'Nomenclature of Territorial Units for Statistics' (NUTS)
Expand All @@ -90,41 +86,28 @@ website (last download March 27, 2020, per [@erikfilias](https://github.com/erik
#### Example for using this codelist

The code snippet (Python) below shows how to obtain a recursive dictionary
along the NUTS classification from the NUTS-3 codelist, i.e.,
along the NUTS classification, i.e.,

```
hierarchy = {
nuts_hierarchy = {
<country>: {
<nuts1>: {
<nuts2>: [<list of nuts3>],
<nuts2>: [<list of nuts3 areas>],
... },
... },
... },
}
```

The package also includes a `regions` dictionary with the names
of all NUTS areas.

```python
# load NUTS-3 codelist from file
import yaml
with open(f'nuts3.yaml', 'r') as stream:
nuts3_codelist = yaml.load(stream, Loader=yaml.FullLoader)

# auxiliary function to add key-value to object and return
def add_to(mapping, key, value):
if key not in mapping:
mapping[key] = value
elif isinstance(value, list):
mapping[key] += value
return mapping[key]

hierarchy = dict()

# iterate over NUTS-3 codelist and recursively add items to the hierarchy dict
for n3, mapping in nuts3_codelist.items():
country, n1, n2 = mapping['country'], mapping['nuts1'], mapping['nuts2']
country_dict = add_to(hierarchy, country, {n1: dict()})
n1_dict = add_to(country_dict, n1, {n2: list()})
add_to(n1_dict, n2, [n3])
>>> import nomenclature as nc
>>> nc.nuts_hierarchy['Belgium']['BE2']['BE24']
['BE241', 'BE242']]
>>> nc.regions['BE241']['name']
'Arr. Halle-Vilvoorde'
```

### Other sub-country area classification
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ representative periods (e.g., "summer-day").
Each item in the codelists below includes an attribute `duration` indicating
the duration relative to a normal year (i.e., not a leap year).

### Yearly data

The default entry for the openENTRANCE data format in the "subannual" column
is "Year". Its `duratuon` attribute is set to`1`.

### Months

See [months.yaml](months.yaml) for the codelist.
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions nomenclature/definitions/subannual/year.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Default entry for the subannual column

Year:
duration: 1
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ def test_parse_yaml_files():
print(f"Error parsing file `{file}`\n{e}\n")

# tests fails if any file cannot be parsed, show list of these files
assert not lst
assert not lst
22 changes: 22 additions & 0 deletions nomenclature/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import nomenclature as nc


def test_variables():
# check that regions dictionary is not empty and has specific element
assert 'Emissions|CO2' in nc.variables


def test_regions():
# check that regions dictionary is not empty and has specific element
assert 'Europe' in nc.regions


def test_iso_mapping():
# check that iso-mapping dictionary is not empty and has specific elements
for name in ['GR', 'GRC', 'EL']:
assert nc.iso_mapping[name] == 'Greece'


def test_nuts_hierarchy():
# check that nuts-hierarchy is not empty and has specific elements
assert nc.nuts_hierarchy['Belgium']['BE2']['BE24'] == ['BE241', 'BE242']
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pyyaml
pyam-iamc # the pyam package is released on pypi under this name
23 changes: 23 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[metadata]
name = nomenclature
author = openENTRANCE consortium
author_email = [email protected]
license = Apache License 2.0
description = Model linkage nomenclature for the openENTRANCE project
long_description = file: README.md
long_description_content_type = text/x-md
url = https://github.com/openENTRANCE/nomenclature

[options]
packages = nomenclature
include_package_data = True
install_requires =
setuptools >= 41
pyyaml
setup_requires =
setuptools >= 41
setuptools_scm

[options.package_data]
iam_units =
nomenclature/*
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from setuptools import setup

setup(use_scm_version=True)

0 comments on commit 539c26d

Please sign in to comment.