diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index 33cd0418..7d683704 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -31,6 +31,10 @@ jobs:
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- - name: Test with pytest
+ - name: Test successful parsing of yaml files
run: |
- pytest tests
+ pytest nomenclature/tests/_test_parse_yaml.py
+ - name: Install and test package functions
+ run: |
+ pip install --editable .
+ pytest nomenclature/tests
diff --git a/README.md b/README.md
index 3d35eda8..49136e1d 100644
--- a/README.md
+++ b/README.md
@@ -46,13 +46,14 @@ industry or the building stock.
In the data format, every timeseries is described by six dimensions (codes):
-1. Model - [more information](model)
-2. Scenario - [more information](scenario)
-3. Region - [more information](region)
-4. Variable - [more information](variable)
-5. Unit - see the section on [variables](variable) for details
+1. Model - [more information](nomenclature/definitions/model)
+2. Scenario - [more information](nomenclature/definitions/scenario)
+3. Region - [more information](nomenclature/definitions/region)
+4. Variable - [more information](nomenclature/definitions/variable)
+5. Unit - see the section on [variables](nomenclature/definitions/variable)
+ for details
6. Subannual (optional, default 'Year')[1] -
- [more information](subannual)
+ [more information](nomenclature/definitions/subannual)
In addition to these six dimensions, every timeseries is described by
a set of **year-value** pairs.
diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py
new file mode 100644
index 00000000..121bea2e
--- /dev/null
+++ b/nomenclature/__init__.py
@@ -0,0 +1,115 @@
+from pathlib import Path
+import logging
+import yaml
+from pyam import IamDataFrame
+
+# set up logging formatting
+logger = logging.getLogger(__name__)
+stderr_info_handler = logging.StreamHandler()
+formatter = logging.Formatter('%(name)s - %(levelname)s: %(message)s')
+stderr_info_handler.setFormatter(formatter)
+logger.addHandler(stderr_info_handler)
+
+
+# path to nomenclature definitions
+DEF_PATH = Path(__file__).parent / 'definitions'
+
+
+def _parse_yaml(path, file='**/*', ext='.yaml'):
+ """Parse `file` in `path` (or all files in subfolders if `file='**/*'`)"""
+ dct = {}
+ for f in path.glob(f'{file}{ext}'):
+ with open(f, 'r') as stream:
+ _dct = yaml.safe_load(stream)
+ # add `file` attribute to each element in the dictionary
+ for key, value in _dct.items():
+ value['file'] = str(f)
+ dct.update(_dct)
+ return dct
+
+
+variables = _parse_yaml(DEF_PATH / 'variable')
+"""Dictionary of variables"""
+
+
+regions = _parse_yaml(DEF_PATH / 'region')
+"""Dictionary of all regions"""
+
+
+countries = _parse_yaml(DEF_PATH / 'region', 'countries')
+"""Dictionary of countries"""
+
+
+iso_mapping = dict(
+ [(countries[c]['iso3'], c) for c in countries]
+ + [(countries[c]['iso2'], c) for c in countries]
+ # add alternative iso2 codes used by the European Commission to the mapping
+ + [(countries[c]['iso2_alt'], c) for c in countries
+ if 'iso2_alt' in countries[c]]
+)
+"""Dictionary of iso2/iso3/alternative-iso2 codes to country names"""
+
+
+def _add_to(mapping, key, value):
+ """Add key-value to mapping"""
+ if key not in mapping:
+ mapping[key] = value
+ elif isinstance(value, list):
+ mapping[key] += value
+ return mapping[key]
+
+
+def _create_nuts3_hierarchy():
+ """Parse nuts3.yaml and create hierarchical dictionary"""
+ hierarchy = dict()
+ keys = ['country', 'nuts1', 'nuts2']
+ for n3, mapping in _parse_yaml(DEF_PATH / 'region', 'nuts3').items():
+ country, n1, n2 = [mapping.get(i) for i in keys]
+ country_dict = _add_to(hierarchy, country, {n1: dict()})
+ n1_dict = _add_to(country_dict, n1, {n2: list()})
+ _add_to(n1_dict, n2, [n3])
+ return hierarchy
+
+
+nuts_hierarchy = _create_nuts3_hierarchy()
+"""Hierarchical dictionary of nuts region classification"""
+
+
+subannual = _parse_yaml(DEF_PATH / 'subannual')
+"""Dictionary of subannual timeslices"""
+
+
+def validate(df):
+ """Validate that all columns of a dataframe follow the nomenclature
+
+ Parameters
+ ----------
+ df : path to file, pandas.DataFrame, pyam.IamDataFrame (or castable object)
+ A timeseries dataframe following the common data format
+
+ Returns
+ -------
+ bool
+ Return `True` if all column entries in `df` are valid
+ or `False` otherwise
+ """
+ df = IamDataFrame(df)
+ success = True
+
+ # set up list of dimension (columns) to validate
+ cols = [
+ ('region', regions, 's'),
+ ('variable', variables, 's')
+ ]
+ if 'subannual' in df.data.columns:
+ cols.append(('subannual', subannual, ' timeslices'))
+
+ # iterate over dimensions and perform validation
+ msg = 'The following {} are not defined in the nomenclature:\n {}'
+ for col, codelist, ext in cols:
+ invalid = [c for c in df.data[col].unique() if c not in codelist]
+ if invalid:
+ success = False
+ logger.warning(msg.format(col + ext, invalid))
+
+ return success
diff --git a/model/README.md b/nomenclature/definitions/model/README.md
similarity index 100%
rename from model/README.md
rename to nomenclature/definitions/model/README.md
diff --git a/region/README.md b/nomenclature/definitions/region/README.md
similarity index 75%
rename from region/README.md
rename to nomenclature/definitions/region/README.md
index e01e7510..524244c7 100644
--- a/region/README.md
+++ b/nomenclature/definitions/region/README.md
@@ -50,23 +50,19 @@ in the [data](data) folder.
#### Example for using this codelist
The code snippet (Python) below shows how to obtain the list of countries
-and a mapping of ISO2-codes (including alternatives)
-to the common country names.
+and a mapping of ISO2/3-codes (including alternatives)
+to the common country names using the installable Python package.
```python
-# load countries codelist from file
-import yaml
-with open('countries.yaml', 'r') as stream:
- country_codelist = yaml.load(stream, Loader=yaml.FullLoader)
-
-# translate codelist to list and mapping (dictionary)
-list_of_countries = list(country_codelist)
-iso2_mapping = dict(
- [(country_codelist[c]['iso2'], c) for c in country_codelist]
- # add alternative iso2 codes used by the European Commission to the mapping
- + [(country_codelist[c]['iso2_alt'], c) for c in country_codelist
- if 'iso2_alt' in country_codelist[c]]
-)
+>>> import nomenclature as nc
+>>> list(nc.countries)
+['Albania', 'Andorra', 'Austria', ..., 'United Kingdom']
+>>> nc.iso_mapping['GR']
+'Greece'
+>>> nc.iso_mapping['GRC']
+'Greece'
+>>> nc.iso_mapping['EL']
+'Greece'
```
### Sub-country areas following the 'Nomenclature of Territorial Units for Statistics' (NUTS)
@@ -90,41 +86,28 @@ website (last download March 27, 2020, per [@erikfilias](https://github.com/erik
#### Example for using this codelist
The code snippet (Python) below shows how to obtain a recursive dictionary
-along the NUTS classification from the NUTS-3 codelist, i.e.,
+along the NUTS classification, i.e.,
```
-hierarchy = {
+nuts_hierarchy = {
: {
: {
- : [],
+ : [],
... },
... },
... },
}
```
+The package also includes a `regions` dictionary with the names
+of all NUTS areas.
+
```python
-# load NUTS-3 codelist from file
-import yaml
-with open(f'nuts3.yaml', 'r') as stream:
- nuts3_codelist = yaml.load(stream, Loader=yaml.FullLoader)
-
-# auxiliary function to add key-value to object and return
-def add_to(mapping, key, value):
- if key not in mapping:
- mapping[key] = value
- elif isinstance(value, list):
- mapping[key] += value
- return mapping[key]
-
-hierarchy = dict()
-
-# iterate over NUTS-3 codelist and recursively add items to the hierarchy dict
-for n3, mapping in nuts3_codelist.items():
- country, n1, n2 = mapping['country'], mapping['nuts1'], mapping['nuts2']
- country_dict = add_to(hierarchy, country, {n1: dict()})
- n1_dict = add_to(country_dict, n1, {n2: list()})
- add_to(n1_dict, n2, [n3])
+>>> import nomenclature as nc
+>>> nc.nuts_hierarchy['Belgium']['BE2']['BE24']
+['BE241', 'BE242']]
+>>> nc.regions['BE241']['name']
+'Arr. Halle-Vilvoorde'
```
### Other sub-country area classification
diff --git a/region/aggregate-regions.yaml b/nomenclature/definitions/region/aggregate-regions.yaml
similarity index 100%
rename from region/aggregate-regions.yaml
rename to nomenclature/definitions/region/aggregate-regions.yaml
diff --git a/region/countries.yaml b/nomenclature/definitions/region/countries.yaml
similarity index 100%
rename from region/countries.yaml
rename to nomenclature/definitions/region/countries.yaml
diff --git a/region/data/countries.csv b/nomenclature/definitions/region/data/countries.csv
similarity index 100%
rename from region/data/countries.csv
rename to nomenclature/definitions/region/data/countries.csv
diff --git a/region/data/write-countries.py b/nomenclature/definitions/region/data/write-countries.py
similarity index 100%
rename from region/data/write-countries.py
rename to nomenclature/definitions/region/data/write-countries.py
diff --git a/region/data/write-nuts.py b/nomenclature/definitions/region/data/write-nuts.py
similarity index 100%
rename from region/data/write-nuts.py
rename to nomenclature/definitions/region/data/write-nuts.py
diff --git a/region/nuts1.yaml b/nomenclature/definitions/region/nuts1.yaml
similarity index 100%
rename from region/nuts1.yaml
rename to nomenclature/definitions/region/nuts1.yaml
diff --git a/region/nuts2.yaml b/nomenclature/definitions/region/nuts2.yaml
similarity index 100%
rename from region/nuts2.yaml
rename to nomenclature/definitions/region/nuts2.yaml
diff --git a/region/nuts3.yaml b/nomenclature/definitions/region/nuts3.yaml
similarity index 100%
rename from region/nuts3.yaml
rename to nomenclature/definitions/region/nuts3.yaml
diff --git a/region/subcountries.yaml b/nomenclature/definitions/region/subcountries.yaml
similarity index 100%
rename from region/subcountries.yaml
rename to nomenclature/definitions/region/subcountries.yaml
diff --git a/scenario/README.md b/nomenclature/definitions/scenario/README.md
similarity index 100%
rename from scenario/README.md
rename to nomenclature/definitions/scenario/README.md
diff --git a/subannual/README.md b/nomenclature/definitions/subannual/README.md
similarity index 90%
rename from subannual/README.md
rename to nomenclature/definitions/subannual/README.md
index 091d4c39..a669242c 100644
--- a/subannual/README.md
+++ b/nomenclature/definitions/subannual/README.md
@@ -21,6 +21,11 @@ representative periods (e.g., "summer-day").
Each item in the codelists below includes an attribute `duration` indicating
the duration relative to a normal year (i.e., not a leap year).
+### Yearly data
+
+The default entry for the openENTRANCE data format in the "subannual" column
+is "Year". Its `duratuon` attribute is set to`1`.
+
### Months
See [months.yaml](months.yaml) for the codelist.
diff --git a/subannual/months.yaml b/nomenclature/definitions/subannual/months.yaml
similarity index 100%
rename from subannual/months.yaml
rename to nomenclature/definitions/subannual/months.yaml
diff --git a/nomenclature/definitions/subannual/year.yaml b/nomenclature/definitions/subannual/year.yaml
new file mode 100644
index 00000000..4e6a28fb
--- /dev/null
+++ b/nomenclature/definitions/subannual/year.yaml
@@ -0,0 +1,4 @@
+# Default entry for the subannual column
+
+Year:
+ duration: 1
\ No newline at end of file
diff --git a/variable/README.md b/nomenclature/definitions/variable/README.md
similarity index 100%
rename from variable/README.md
rename to nomenclature/definitions/variable/README.md
diff --git a/variable/data/variables_iamc15.csv b/nomenclature/definitions/variable/data/variables_iamc15.csv
similarity index 100%
rename from variable/data/variables_iamc15.csv
rename to nomenclature/definitions/variable/data/variables_iamc15.csv
diff --git a/variable/data/write-iamc15c-variables.py b/nomenclature/definitions/variable/data/write-iamc15c-variables.py
similarity index 100%
rename from variable/data/write-iamc15c-variables.py
rename to nomenclature/definitions/variable/data/write-iamc15c-variables.py
diff --git a/variable/economy/README.md b/nomenclature/definitions/variable/economy/README.md
similarity index 100%
rename from variable/economy/README.md
rename to nomenclature/definitions/variable/economy/README.md
diff --git a/variable/economy/economy.yaml b/nomenclature/definitions/variable/economy/economy.yaml
similarity index 100%
rename from variable/economy/economy.yaml
rename to nomenclature/definitions/variable/economy/economy.yaml
diff --git a/variable/emissions/README.md b/nomenclature/definitions/variable/emissions/README.md
similarity index 100%
rename from variable/emissions/README.md
rename to nomenclature/definitions/variable/emissions/README.md
diff --git a/variable/emissions/emissions.yaml b/nomenclature/definitions/variable/emissions/emissions.yaml
similarity index 100%
rename from variable/emissions/emissions.yaml
rename to nomenclature/definitions/variable/emissions/emissions.yaml
diff --git a/variable/energy/README.md b/nomenclature/definitions/variable/energy/README.md
similarity index 100%
rename from variable/energy/README.md
rename to nomenclature/definitions/variable/energy/README.md
diff --git a/variable/energy/energy-final.yaml b/nomenclature/definitions/variable/energy/energy-final.yaml
similarity index 100%
rename from variable/energy/energy-final.yaml
rename to nomenclature/definitions/variable/energy/energy-final.yaml
diff --git a/variable/energy/energy-primary.yaml b/nomenclature/definitions/variable/energy/energy-primary.yaml
similarity index 100%
rename from variable/energy/energy-primary.yaml
rename to nomenclature/definitions/variable/energy/energy-primary.yaml
diff --git a/variable/energy/energy-secondary.yaml b/nomenclature/definitions/variable/energy/energy-secondary.yaml
similarity index 100%
rename from variable/energy/energy-secondary.yaml
rename to nomenclature/definitions/variable/energy/energy-secondary.yaml
diff --git a/variable/technology/README.md b/nomenclature/definitions/variable/technology/README.md
similarity index 100%
rename from variable/technology/README.md
rename to nomenclature/definitions/variable/technology/README.md
diff --git a/variable/technology/electricity-grid.yaml b/nomenclature/definitions/variable/technology/electricity-grid.yaml
similarity index 100%
rename from variable/technology/electricity-grid.yaml
rename to nomenclature/definitions/variable/technology/electricity-grid.yaml
diff --git a/variable/technology/power-plant.yaml b/nomenclature/definitions/variable/technology/power-plant.yaml
similarity index 100%
rename from variable/technology/power-plant.yaml
rename to nomenclature/definitions/variable/technology/power-plant.yaml
diff --git a/variable/technology/technologies.yaml b/nomenclature/definitions/variable/technology/technologies.yaml
similarity index 100%
rename from variable/technology/technologies.yaml
rename to nomenclature/definitions/variable/technology/technologies.yaml
diff --git a/tests/test_init.py b/nomenclature/tests/_test_parse_yaml.py
similarity index 96%
rename from tests/test_init.py
rename to nomenclature/tests/_test_parse_yaml.py
index c7a6fa3b..e798fb53 100644
--- a/tests/test_init.py
+++ b/nomenclature/tests/_test_parse_yaml.py
@@ -16,4 +16,4 @@ def test_parse_yaml_files():
print(f"Error parsing file `{file}`\n{e}\n")
# tests fails if any file cannot be parsed, show list of these files
- assert not lst
\ No newline at end of file
+ assert not lst
diff --git a/nomenclature/tests/test_core.py b/nomenclature/tests/test_core.py
new file mode 100644
index 00000000..4ebb6872
--- /dev/null
+++ b/nomenclature/tests/test_core.py
@@ -0,0 +1,22 @@
+import nomenclature as nc
+
+
+def test_variables():
+ # check that regions dictionary is not empty and has specific element
+ assert 'Emissions|CO2' in nc.variables
+
+
+def test_regions():
+ # check that regions dictionary is not empty and has specific element
+ assert 'Europe' in nc.regions
+
+
+def test_iso_mapping():
+ # check that iso-mapping dictionary is not empty and has specific elements
+ for name in ['GR', 'GRC', 'EL']:
+ assert nc.iso_mapping[name] == 'Greece'
+
+
+def test_nuts_hierarchy():
+ # check that nuts-hierarchy is not empty and has specific elements
+ assert nc.nuts_hierarchy['Belgium']['BE2']['BE24'] == ['BE241', 'BE242']
diff --git a/requirements.txt b/requirements.txt
index c3726e8b..927e9032 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
pyyaml
+pyam-iamc # the pyam package is released on pypi under this name
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..cd972262
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,23 @@
+[metadata]
+name = nomenclature
+author = openENTRANCE consortium
+author_email = huppmann@iiasa.ac.at
+license = Apache License 2.0
+description = Model linkage nomenclature for the openENTRANCE project
+long_description = file: README.md
+long_description_content_type = text/x-md
+url = https://github.com/openENTRANCE/nomenclature
+
+[options]
+packages = nomenclature
+include_package_data = True
+install_requires =
+ setuptools >= 41
+ pyyaml
+setup_requires =
+ setuptools >= 41
+ setuptools_scm
+
+[options.package_data]
+iam_units =
+ nomenclature/*
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..d5d43d7c
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,3 @@
+from setuptools import setup
+
+setup(use_scm_version=True)