diff --git a/.github/workflows/python_tests_and_linting.yml b/.github/workflows/python_tests_and_linting.yml
new file mode 100644
index 0000000..5fb5ebd
--- /dev/null
+++ b/.github/workflows/python_tests_and_linting.yml
@@ -0,0 +1,42 @@
+name: test_automation
+
+on:
+ pull_request:
+ branches:
+ - develop
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ python-version: [3.10.12] # Versions of Python to test against
+
+ steps:
+ - name: Check out repository code
+ uses: actions/checkout@v2
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Python dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install poetry
+ poetry install # Ensure all dependencies are installed
+
+ - name: Run tests
+ if: always()
+ run: |
+ poetry run python -m unittest discover
+
+ - name: Lint python code with flake8
+ if: always()
+ run: |
+ poetry run flake8 .
+
+
+
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..93335b2
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,65 @@
+name: Release
+
+on:
+ release:
+ types: [created]
+
+jobs:
+ build-and-deploy-python:
+ name: Build and Deploy Python Package
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v2
+
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.10.12'
+
+ - name: Install Poetry
+ run: |
+ curl -sSL https://install.python-poetry.org | python3 -
+
+ - name: Install dependencies
+ run: poetry install
+
+ - name: Build package
+ run: poetry build
+
+ - name: Publish package to PyPI
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+ run: poetry publish --username $TWINE_USERNAME --password $TWINE_PASSWORD
+
+ build-and-deploy-r:
+ name: Build and Deploy R Package
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v2
+
+ - name: Set up R
+ uses: r-lib/actions/setup-r@v2
+
+ - name: Install dependencies
+ run: |
+ Rscript -e 'install.packages("devtools")'
+
+ - name: Build package
+ run: |
+ R CMD build .
+
+ - name: Check package
+ run: |
+ R CMD check *.tar.gz
+
+ - name: Publish package to CRAN
+ run: |
+ Rscript -e 'devtools::release()'
+ env:
+ CRAN_USERNAME: ${{ secrets.CRAN_USERNAME }}
+ CRAN_PASSWORD: ${{ secrets.CRAN_PASSWORD }}
diff --git a/.github/workflows/update_documentation.yml b/.github/workflows/update_documentation.yml
new file mode 100644
index 0000000..272a091
--- /dev/null
+++ b/.github/workflows/update_documentation.yml
@@ -0,0 +1,70 @@
+name: Deploy Documentation
+
+on:
+ pull_request:
+ branches:
+ - develop
+ types:
+ - closed
+
+jobs:
+ build-and-deploy:
+ if: github.event.pull_request.merged == true
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.10.12' # Use Python 3.10.12
+
+ - name: Set up R
+ uses: r-lib/actions/setup-r@v2
+
+ - name: Install system dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y libcurl4-openssl-dev libssl-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev
+
+ - name: Install Python dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install poetry
+ poetry install # Ensure all dependencies are installed
+
+ - name: Install IRkernel
+ run: |
+ pip install jupyter
+ R -e 'install.packages(c("renv", "IRkernel"))'
+ R -e 'IRkernel::installspec(user = TRUE)'
+
+ - name: Install R dependencies
+ run: |
+ R -e "renv::restore()"
+
+ - uses: actions/checkout@v2
+ - name: Configure Git Credentials
+ run: |
+ git config user.name github-actions[bot]
+ git config user.email 41898282+github-actions[bot]@users.noreply.github.com
+ - uses: actions/setup-python@v5
+ with:
+ python-version: 3.x
+ - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+
+
+ - uses: actions/cache@v2
+ with:
+ key: mkdocs-material-${{ env.cache_id }}
+ path: .cache
+ restore-keys: |
+ mkdocs-material-
+ - name: Make file and deploy documentation
+ run: |
+ make
+ git fetch origin gh-pages --depth=1
+ poetry run mike set-default develop
+ poetry run mike deploy develop -p
diff --git a/DESCRIPTION b/DESCRIPTION
index 99157f4..a239e32 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -3,7 +3,7 @@ Title: POSTED: Potsdam open-source techno-economic database
Version: 0.3.0
Authors@R: c(
person(given = "Philipp C.", family = "Verpoort", email= "philipp.verpoort@pik-potsdam.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-1319-5006")),
- person(given = "Leo", family = "Heidweiler", role = "aut")),
+ person(given = "Leo", family = "Heidweiler", role = "aut"),
person(given = "Paul", family = "Effing", role = "aut"))
Description: POSTED is a consistent framework, public database, and open-source toolbox of techno-economic data of energy and climate-mitigation technologies. In particular, it provides a structure and contains actual data on capital expenditure, operational expenditure, energy and feedstock demand, emissions intensities, and other characteristics of conversion, storage, and transportation technologies in the energy and related sectors. The accompanying software code is intended for consistent maintenance of this data and for deriving straight-forward results from them, such as levelised cost, levelised emissions intensities, or marginal abatement cost.
License: MIT
@@ -22,3 +22,6 @@ Collate:
'masking.R'
'tedf.R'
'noslag.R'
+Suggests:
+ testthat (>= 3.0.0)
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..e06a3f2
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,20 @@
+# Generated by roxygen2: do not edit by hand
+
+export(AbstractFieldDefinition)
+export(DataSet)
+export(Mask)
+export(TEBase)
+export(TEDF)
+export(apply_cond)
+export(collect_files)
+export(combine_units)
+export(is_float)
+export(normalise_units)
+export(normalise_values)
+export(read_csv_file)
+export(read_definitions)
+export(read_masks)
+export(read_yml_file)
+export(replace_tags)
+export(unit_convert)
+export(unit_token_func)
diff --git a/docs/tutorials/R/overview.py b/docs/tutorials/R/overview.py
index d2678f9..35dc5a7 100644
--- a/docs/tutorials/R/overview.py
+++ b/docs/tutorials/R/overview.py
@@ -1,66 +1,317 @@
+# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext:
+# formats: ipynb,py
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
-# jupytext_version: 1.16.1
+# jupytext_version: 1.16.2
# kernelspec:
# display_name: R
# language: R
# name: ir
# ---
+# # Overview of working with POSTED
+
+# ## Prerequisits
+
+# #### Dependencies
+
+# First, we import some general-purpose libraries. The python-side of `posted` depends on `pandas` for working with dataframes. Here we also use `plotly` and `itables` for plotting and inspecting data, but `posted` does not depend on those and other tools could be used instead. The package `igraph` is an optional dependency used for representing the interlinkages in value chains. The package `matplotlib` is only used for plotting igraphs, which is again optional.
+
+# + vscode={"languageId": "r"}
+library(IRdisplay)
+library(plotly)
+library(reshape2)
+# -
+
+# #### Importing POSTED
+
+# The `posted` package has to be installed in the python environment. If it is not installed yet, you can easily install it from the GitHub source code using `pip`.
+
# + vscode={"languageId": "r"}
devtools::load_all()
+# -
+
+# Import specific functions and classes from POSTED that will be used later.
+
+# + vscode={"languageId": "r"}
+getwd()
+setwd("../../../")
+
+# + vscode={"languageId": "r"}
+getwd()
+# -
+
+# Use some basic plotly and pandas functions for plotting and output analysis
+
+# ## NOSLAG
+
+# #### Electrolysis CAPEX
+
+# Let's compare CAPEX data for electrolysis in years 2020–2050 for Alkaline and PEM across different sources (Danish Energy Agency, Breyer, Fraunhofer, IRENA) for different electrolyser plant sizes.
+
+# + vscode={"languageId": "r"}
+# select data from TEDFs
+df_elh2 = DataSet$new('Tech|Electrolysis')$select(
+ period=list(2020, 2030, 2040, 2050),
+ subtech=list('AEL', 'PEM'),
+ override=list('Tech|Electrolysis|Output Capacity|Hydrogen'= 'kW;LHV'),
+ source=list('DEARF23', 'Vartiainen22', 'Holst21', 'IRENA22'),
+ size=list('1 MW', '5 MW', '100 MW'),
+ extrapolate_period=FALSE
+ ) %>% subset(variable=='Tech|Electrolysis|CAPEX')
+
+# display a few examples
+display(df_elh2 %>% sample_n(15))
+
+
+# sort data and plot
+df_elh2 <- df_elh2 %>%
+ separate(size, into = c("size_sort", "rest"), sep = " ", remove = FALSE) %>%
+ mutate(size_sort = as.integer(size_sort)) %>%
+ select(-rest) %>% arrange(size_sort, period)
+
+p <- ggplot(df_elh2, aes(x=period, y=value, color=source)) + geom_line() + facet_grid(vars(subtech), vars(size))
+
+p
+# -
+
+# Based on those many sources and cases (size and subtechnology), we can now aggregate the data for further use.
+
# + vscode={"languageId": "r"}
-par(bg = "white")
-plot(1:10)
+DataSet$new('Tech|Electrolysis')$aggregate(
+ period=list(2020, 2030, 2040, 2050),
+ subtech=list('AEL', 'PEM'),
+ override=list('Tech|Electrolysis|Output Capacity|Hydrogen'= 'kW;LHV'),
+ source=list('DEARF23', 'Vartiainen22', 'Holst21', 'IRENA22'),
+ size=list('1 MW', '5 MW', '100 MW'),
+ agg=list('subtech', 'size', 'source'),
+ extrapolate_period=FALSE,
+ ) %>% filter(variable %in% c('Tech|Electrolysis|CAPEX', 'Tech|Electrolysis|Output Capacity|Hydrogen'))
+
+ # .team.varsplit('Tech|Electrolysis|*variable') \
+ # .query(f"variable.isin({['CAPEX', 'Output Capacity|Hydrogen']})")
+# -
+
+# #### Energy demand of green vs. blue hydrogen production
+
+# Next, let's compare the energy demand of methane reforming (for blue hydrogen) and different types of electrolysis (for green hydrogen).
# + vscode={"languageId": "r"}
-tedf <- TEDF$new("Tech|Electrolysis")$load()
-tedf$data
+pd.concat([
+ DataSet('Tech|Methane Reforming').aggregate(period=2030, source='Lewis22'),
+ DataSet('Tech|Electrolysis').aggregate(period=2030, agg=['source', 'size']),
+ ]) \
+ .reset_index(drop=True) \
+ .team.varsplit('Tech|?tech|Input|?fuel') \
+ .assign(tech=lambda df: df.apply(lambda row: f"{row['tech']}
({row['subtech']})" if pd.isnull(row['capture_rate']) else f"{row['tech']}
({row['subtech']}, {row['capture_rate']} CR)", axis=1)) \
+ .plot.bar(x='tech', y='value', color='fuel') \
+ .update_layout(
+ xaxis_title='Technologies',
+ yaxis_title='Energy demand ( MWhLHV / MWhLHV H2 )',
+ legend_title='Energy carriers',
+ )
+# -
+
+# #### Energy demand of iron direct reduction
+
+# Next, let's compare the energy demand of iron direct reduction (production of low-carbon crude iron) across sources.
# + vscode={"languageId": "r"}
- DataSet$new('Tech|Electrolysis')$normalise(override=list('Tech|Electrolysis|Input Capacity|elec'= 'kW', 'Tech|Electrolysis|Output Capacity|h2'= 'kW;LHV')) %>% filter(source=='Vartiainen22')
+DataSet('Tech|Iron Direct Reduction') \
+ .aggregate(period=2030, mode='h2', agg=[]) \
+ .team.varsplit('Tech|Iron Direct Reduction|Input|?fuel') \
+ .query(f"fuel != 'Iron Ore'") \
+ .team.varcombine('{fuel} ({component})') \
+ .plot.bar(x='source', y='value', color='variable') \
+ .update_layout(
+ xaxis_title='Sources',
+ yaxis_title='Energy demand ( MWhLHV / tDRI )',
+ legend_title='Energy carriers'
+ )
+# -
+
+# We can also compare the energy demand for operation with hydrogen or with fossil gas for only one source.
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Electrolysis')$normalise(override=list('Tech|Electrolysis|Output Capacity|h2'= 'kW;LHV'))
+DataSet('Tech|Iron Direct Reduction') \
+ .select(period=2030, source='Jacobasch21') \
+ .team.varsplit('Tech|Iron Direct Reduction|Input|?fuel') \
+ .query(f"fuel.isin({['Electricity', 'Fossil Gas', 'Hydrogen']})") \
+ .plot.bar(x='mode', y='value', color='fuel') \
+ .update_layout(
+ xaxis_title='Mode of operation',
+ yaxis_title='Energy demand ( MWhLHV / tDRI )',
+ legend_title='Energy carriers'
+ )
+# -
+
+# #### Energy demand of Haber-Bosch synthesis
+
+# Finally, let's compare the energy demand of Haber-Bosch synthesis between an integrated SMR plant and a plant running on green hydrogen.
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Electrolysis')$select(period=2020, subtech='AEL', size='100 MW', override=list('Tech|Electrolysis|Output Capacity|h2'= 'kW;LHV'))
+pd.concat([
+ DataSet('Tech|Haber-Bosch with ASU').aggregate(period=2024, agg='component'),
+ DataSet('Tech|Haber-Bosch with Reforming').aggregate(period=2024, agg='component')
+ ]) \
+ .reset_index(drop=True) \
+ .team.varsplit('Tech|?tech|*variable') \
+ .query(f"variable.str.startswith('Input|')") \
+ .plot.bar(x='source', y='value', color='variable') \
+ .update_layout(
+ xaxis_title='Sources',
+ yaxis_title='Energy demand ( MWhLHV / tNH3 )',
+ legend_title='Energy carriers'
+ )
+# -
+
+# ## TEAM
+
+# #### CalcVariable
+
+# New variables can be calculated manually via the `CalcVariable` class. The next example demonstrates this for calculating the levelised cost of hydrogen.
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Electrolysis')$select(period=2030, source='Yates20', subtech='AEL', size='100 MW', override={'Tech|Electrolysis|Output Capacity|h2'= 'kW;LHV'}, extrapolate_period=FALSE)
+assumptions = pd.DataFrame.from_records([
+ {'elec_price_case': f"Case {i}", 'variable': 'Price|Electricity', 'unit': 'EUR_2020/MWh', 'value': 30 + (i-1)*25}
+ for i in range(1, 4)
+] + [
+ {'variable': 'Tech|Electrolysis|OCF', 'value': 50, 'unit': 'pct'},
+ {'variable': 'Annuity Factor', 'value': annuity_factor(Q('5 pct'), Q('18 a')).m, 'unit': '1/a'},
+])
+display(assumptions)
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Electrolysis')$select(subtech=c('AEL', 'PEM'), size='100 MW', override={'Tech|Electrolysis|Input Capacity|Electricity'= 'kW'})
+df_calc = pd.concat([
+ DataSet('Tech|Electrolysis').aggregate(period=[2030, 2040, 2050], subtech=['AEL', 'PEM'], agg=['size', 'source']),
+ assumptions,
+ ]).team.perform(CalcVariable(**{
+ 'LCOX|Green Hydrogen|Capital Cost': lambda x: (x['Annuity Factor'] * x['Tech|Electrolysis|CAPEX'] / x['Tech|Electrolysis|Output Capacity|Hydrogen'] / x['Tech|Electrolysis|OCF']),
+ 'LCOX|Green Hydrogen|OM Cost Fixed': lambda x: x['Tech|Electrolysis|OPEX Fixed'] / x['Tech|Electrolysis|Output Capacity|Hydrogen'] / x['Tech|Electrolysis|OCF'],
+ 'LCOX|Green Hydrogen|Input Cost|Electricity': lambda x: x['Price|Electricity'] * x['Tech|Electrolysis|Input|Electricity'] / x['Tech|Electrolysis|Output|Hydrogen'],
+ }), only_new=True) \
+ .team.unit_convert(to='EUR_2020/MWh')
+
+display(df_calc.sample(15).sort_index())
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Electrolysis')$aggregate(subtech='AEL', size='100 MW', agg='subtech', override={'Tech|Electrolysis|Output Capacity|Hydrogen'='kW;LHV'})
+df_calc.team.varsplit('LCOX|Green Hydrogen|?component') \
+ .sort_values(by=['elec_price_case', 'value']) \
+ .plot.bar(x='period', y='value', color='component', facet_col='elec_price_case', facet_row='subtech')
+# -
+
+# #### Pivot
+
+# POSTED uses the `pivot` dataframe method to bring the data into a usable format.
# + vscode={"languageId": "r"}
-# DataSet$new('Tech|Methane Reforming')$aggregate(period=2030).query("variable.str.contains('OM Cost')"))
-# display(DataSet('Tech|Methane Reforming').aggregate(period=2030).query("variable.str.contains('Demand')"))
-DataSet$new('Tech|Methane Reforming')$aggregate(period=2030) %>% arrange(variable)
+pd.concat([
+ DataSet('Tech|Electrolysis').aggregate(period=[2030, 2040, 2050], subtech=['AEL', 'PEM'], agg=['size', 'source']),
+ assumptions,
+ ]).team.pivot_wide().pint.dequantify()
+# -
+
+# #### LCOX of blue and green hydrogen
+
+# POSTED also contains predefined methods for calculating LCOX. Here we apply it to blue and green hydrogen.
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Direct Air Capture')$normalise()
+df_lcox_bluegreen = pd.concat([
+ pd.DataFrame.from_records([
+ {'elec_price_case': f"Case {i}", 'variable': 'Price|Electricity', 'unit': 'EUR_2020/MWh', 'value': 30 + (i-1)*25}
+ for i in range(1, 4)
+ ]),
+ pd.DataFrame.from_records([
+ {'ng_price_case': 'High' if i-1 else 'Low', 'variable': 'Price|Fossil Gas', 'unit': 'EUR_2020/MWh', 'value': 40 if i-1 else 20}
+ for i in range(1, 3)
+ ]),
+ DataSet('Tech|Electrolysis').aggregate(period=2030, subtech=['AEL', 'PEM'], agg=['size', 'subtech', 'source']),
+ DataSet('Tech|Methane Reforming').aggregate(period=2030, capture_rate=['55.70%', '94.50%'])
+ .team.varsplit('Tech|Methane Reforming|*comp')
+ .team.varcombine('{variable} {subtech} ({capture_rate})|{comp}')
+ ]) \
+ .team.perform(
+ LCOX('Output|Hydrogen', 'Electrolysis', name='Green Hydrogen', interest_rate=0.1, book_lifetime=18),
+ LCOX('Output|Hydrogen', 'Methane Reforming SMR (55.70%)', name='Blue Hydrogen (Low CR)', interest_rate=0.1, book_lifetime=18),
+ LCOX('Output|Hydrogen', 'Methane Reforming ATR (94.50%)', name='Blue Hydrogen (High CR)', interest_rate=0.1, book_lifetime=18),
+ only_new=True,
+ ) \
+ .team.unit_convert(to='EUR_2022/MWh')
+
+display(df_lcox_bluegreen)
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Direct Air Capture')$select()
+df_lcox_bluegreen.team.varsplit('LCOX|?fuel|*comp') \
+ .plot.bar(x='fuel', y='value', color='comp', facet_col='elec_price_case', facet_row='ng_price_case')
+# -
+
+# #### LCOX of Methanol
+
+# Let's calculate the levelised cost of green methanol (from electrolytic hydrogen). First we can do this simply based on a hydrogen price (i.e. without accounting for electrolysis).
# + vscode={"languageId": "r"}
-TEDF$new('Tech|Haber-Bosch with ASU')$load()# $check()
-DataSet$new('Tech|Haber-Bosch with ASU')$normalise()
+df_lcox_meoh = pd.concat([
+ DataSet('Tech|Methanol Synthesis').aggregate(period=[2030, 2050]),
+ pd.DataFrame.from_records([
+ {'period': 2030, 'variable': 'Price|Hydrogen', 'unit': 'EUR_2022/MWh', 'value': 120},
+ {'period': 2050, 'variable': 'Price|Hydrogen', 'unit': 'EUR_2022/MWh', 'value': 80},
+ {'period': 2030, 'variable': 'Price|Captured CO2', 'unit': 'EUR_2022/t', 'value': 150},
+ {'period': 2050, 'variable': 'Price|Captured CO2', 'unit': 'EUR_2022/t', 'value': 100},
+ ]),
+ ]) \
+ .team.perform(LCOX(
+ 'Output|Methanol', 'Methanol Synthesis', name='Green Methanol',
+ interest_rate=0.1, book_lifetime=10.0), only_new=True,
+ ) \
+ .team.unit_convert('EUR_2022/MWh')
+
+display(df_lcox_meoh)
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Haber-Bosch with ASU')$select(period=2020)
+df_lcox_meoh.team.varsplit('LCOX|Green Methanol|*component') \
+ .plot.bar(x='period', y='value', color='component')
+# -
+
+# Next, we can calculate the LCOX of green methanol for a the value chain consisting of electrolysis, low-temperature direct air capture, and methanol synthesis. The heat for the direct air capture will be provided by an industrial heat pump.
# + vscode={"languageId": "r"}
-DataSet$new('Tech|Haber-Bosch with ASU')$aggregate(period=2020)
+pc = ProcessChain(
+ 'Green Methanol',
+ {'Methanol Synthesis': {'Methanol': Q('1 MWh')}},
+ 'Heatpump for DAC -> Heat => Direct Air Capture -> Captured CO2 => Methanol Synthesis;Electrolysis -> Hydrogen => Methanol Synthesis -> Methanol',
+)
+
+g, lay = pc.igraph()
+fig, ax = plt.subplots()
+ax.set_title(pc.name)
+ig.plot(g, target=ax, layout=lay, vertex_label=[n.replace(' ', '\n') for n in g.vs['name']], edge_label=[n.replace(' ', '\n') for n in g.es['name']], vertex_label_size=8, edge_label_size=6)
# + vscode={"languageId": "r"}
+df_lcox_meohvc = pd.concat([
+ DataSet('Tech|Electrolysis').aggregate(period=[2030, 2050], subtech=['AEL', 'PEM'], size=['1 MW', '100 MW'], agg=['subtech', 'size', 'source']),
+ DataSet('Tech|Direct Air Capture').aggregate(period=[2030, 2050], subtech='LT-DAC'),
+ DataSet('Tech|Heatpump for DAC').aggregate(period=[2030, 2050]),
+ DataSet('Tech|Methanol Synthesis').aggregate(period=[2030, 2050]),
+ pd.DataFrame.from_records([
+ {'period': 2030, 'variable': 'Price|Electricity', 'unit': 'EUR_2022/MWh', 'value': 50},
+ {'period': 2050, 'variable': 'Price|Electricity', 'unit': 'EUR_2022/MWh', 'value': 30},
+ ]),
+ ]) \
+ .team.perform(pc) \
+ .team.perform(LCOX(
+ 'Methanol Synthesis|Output|Methanol', process_chain='Green Methanol',
+ interest_rate=0.1, book_lifetime=10.0,
+ ), only_new=True) \
+ .team.unit_convert('EUR_2022/MWh')
+
+display(df_lcox_meohvc)
+# + vscode={"languageId": "r"}
+df_lcox_meohvc.team.varsplit('LCOX|Green Methanol|?process|*component') \
+ .plot.bar(x='period', y='value', color='component', hover_data='process')
diff --git a/make_r_docs.R b/make_r_docs.R
index bddc5d8..ee9f2f8 100644
--- a/make_r_docs.R
+++ b/make_r_docs.R
@@ -69,7 +69,7 @@ rd_file_list <- dir_ls("man", type = "file")
for (rd_file in rd_file_list) {
# remove the man/ prefix and the .Rd
name <- sub("^man/(.*)\\.Rd$", "\\1", rd_file)
- system(paste('rd2md man', function_file_path, name))
+ system(paste('poetry run rd2md man', function_file_path, name))
# read in the markdown file of the function/class
function_markdown <- readLines(paste0(function_file_path, name, ".md"))
diff --git a/makefile b/makefile
index a076649..b67bfbc 100644
--- a/makefile
+++ b/makefile
@@ -15,7 +15,7 @@ all: python_docs r_docs
# Define the targets
python_docs: $(R_FILES) $(PYTHON_POSTED_FILES)
- python $(PYTHON_DOC_SCRIPT)
+ poetry run python $(PYTHON_DOC_SCRIPT)
r_docs: $(R_FILES) $(PYTHON_POSTED_FILES)
Rscript $(R_DOC_SCRIPT)
diff --git a/poetry.lock b/poetry.lock
index 67c9d20..85c97d9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "appnope"
@@ -501,6 +501,22 @@ files = [
[package.extras]
devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
+[[package]]
+name = "flake8"
+version = "7.1.0"
+description = "the modular source code checker: pep8 pyflakes and co"
+optional = false
+python-versions = ">=3.8.1"
+files = [
+ {file = "flake8-7.1.0-py2.py3-none-any.whl", hash = "sha256:2e416edcc62471a64cea09353f4e7bdba32aeb079b6e360554c659a122b1bc6a"},
+ {file = "flake8-7.1.0.tar.gz", hash = "sha256:48a07b626b55236e0fb4784ee69a465fbf59d79eec1f5b4785c3d3bc57d17aa5"},
+]
+
+[package.dependencies]
+mccabe = ">=0.7.0,<0.8.0"
+pycodestyle = ">=2.12.0,<2.13.0"
+pyflakes = ">=3.2.0,<3.3.0"
+
[[package]]
name = "fonttools"
version = "4.53.0"
@@ -1265,6 +1281,17 @@ files = [
[package.dependencies]
traitlets = "*"
+[[package]]
+name = "mccabe"
+version = "0.7.0"
+description = "McCabe checker, plugin for flake8"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
+ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+]
+
[[package]]
name = "mdit-py-plugins"
version = "0.4.1"
@@ -2065,6 +2092,17 @@ files = [
{file = "pybtex_apa_style-1.3-py3-none-any.whl", hash = "sha256:d4433acd5a6ddf37489f0f8e0e4e1e8f71df4f3acd98628ea61994e89df96caf"},
]
+[[package]]
+name = "pycodestyle"
+version = "2.12.0"
+description = "Python style guide checker"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pycodestyle-2.12.0-py2.py3-none-any.whl", hash = "sha256:949a39f6b86c3e1515ba1787c2022131d165a8ad271b11370a8819aa070269e4"},
+ {file = "pycodestyle-2.12.0.tar.gz", hash = "sha256:442f950141b4f43df752dd303511ffded3a04c2b6fb7f65980574f0c31e6e79c"},
+]
+
[[package]]
name = "pycparser"
version = "2.22"
@@ -2076,6 +2114,17 @@ files = [
{file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
]
+[[package]]
+name = "pyflakes"
+version = "3.2.0"
+description = "passive checker of Python programs"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"},
+ {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"},
+]
+
[[package]]
name = "pygments"
version = "2.18.0"
@@ -2878,4 +2927,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.12"
-content-hash = "3963e28fd4a41d47e7991c95e2e67ad078c5ae0d0d1eb497b4839aa8d00418a0"
+content-hash = "4558a0347bcd0aad1d7e9da48d07e9116576f3c0c7cfc7d2537df78de0db0a4e"
diff --git a/pyproject.toml b/pyproject.toml
index 3a2cd21..35d1b3d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ igraph = "^0.11.3"
matplotlib = "^3.8.2"
openpyxl = "^3.1.2"
itables = "^2.1.1"
+flake8 = "^7.1.0"
[tool.poetry.group.documentation.dependencies]
mkdocs-material = "^9.5.26"
diff --git a/python/posted/__init__.py b/python/posted/__init__.py
index 8d1c8b6..e69de29 100644
--- a/python/posted/__init__.py
+++ b/python/posted/__init__.py
@@ -1 +0,0 @@
-
diff --git a/python/posted/cmdline.py b/python/posted/cmdline.py
index 2830624..3ce8079 100755
--- a/python/posted/cmdline.py
+++ b/python/posted/cmdline.py
@@ -12,9 +12,11 @@ def main():
parser = argparse.ArgumentParser(
prog='posted',
description='Potsdam open-source techno-economic database',
- epilog='For further details, please consult the source code or code documentation.',
+ epilog='For further details, please consult the source code or code \
+ documentation.',
)
- subparsers = parser.add_subparsers(title='commands', dest='command', help='sub-command help')
+ subparsers = parser.add_subparsers(title='commands', dest='command',
+ help='sub-command help')
# create the parser for the "conv" command
parser_conv = subparsers.add_parser('conv')
diff --git a/python/posted/columns.py b/python/posted/columns.py
index cad1a91..aaec419 100644
--- a/python/posted/columns.py
+++ b/python/posted/columns.py
@@ -11,8 +11,8 @@
def is_float(string: str) -> bool:
- '''Checks if a given string can be converted to a floating-point number in
- Python.
+ """Checks if a given string can be converted to a floating-point
+ number in Python.
Parameters
----------
@@ -23,7 +23,7 @@ def is_float(string: str) -> bool:
-------
bool
True if conversion was successful, False if not
- '''
+ """
try:
float(string)
return True
@@ -32,7 +32,7 @@ def is_float(string: str) -> bool:
class AbstractColumnDefinition:
- '''
+ """
Abstract class to store columns
Parameters
@@ -52,18 +52,29 @@ class AbstractColumnDefinition:
-------
is_allowed
Check if cell is allowed
- '''
- def __init__(self, col_type: str, name: str, description: str, dtype: str, required: bool):
+ """
+
+ def __init__(self,
+ col_type: str,
+ name: str,
+ description: str,
+ dtype: str,
+ required: bool):
if col_type not in ['field', 'variable', 'unit', 'value', 'comment']:
- raise Exception(f"Columns must be of type field, variable, unit, value, or comment but found: {col_type}")
+ raise Exception(f"Columns must be of type field, variable, unit, "
+ f"value, or comment but found: {col_type}")
if not isinstance(name, str):
- raise Exception(f"The 'name' must be a string but found type {type(name)}: {name}")
+ raise Exception(f"The 'name' must be a string but found type "
+ f"{type(name)}: {name}")
if not isinstance(description, str):
- raise Exception(f"The 'name' must be a string but found type {type(description)}: {description}")
+ raise Exception(f"The 'name' must be a string but found type "
+ f"{type(description)}: {description}")
if not (isinstance(dtype, str) and dtype in ['float', 'str', 'category']):
- raise Exception(f"The 'dtype' must be a valid data type but found: {dtype}")
+ raise Exception(f"The 'dtype' must be a valid data type but "
+ f"found: {dtype}")
if not isinstance(required, bool):
- raise Exception(f"The 'required' argument must be a bool but found: {required}")
+ raise Exception(f"The 'required' argument must be a bool but "
+ f"found: {required}")
self._col_type: str = col_type
self._name: str = name
@@ -73,36 +84,36 @@ def __init__(self, col_type: str, name: str, description: str, dtype: str, requi
@property
def col_type(self):
- '''Get col type'''
+ """Get col type"""
return self._col_type
@property
def name(self):
- '''Get name of the column'''
+ """Get name of the column"""
return self._name
@property
def description(self):
- '''Get description of the column'''
+ """Get description of the column"""
return self._description
@property
def dtype(self):
- '''Get data type of the column'''
+ """Get data type of the column"""
return self._dtype
@property
def required(self):
- '''Return if column is required'''
+ """Return if column is required"""
return self._required
@property
def default(self):
- '''Get default value of the column'''
+ """Get default value of the column"""
return np.nan
def is_allowed(self, cell: str | float | int) -> bool:
- '''Check if Cell is allowed
+ """Check if Cell is allowed
Parameters
----------
@@ -112,12 +123,12 @@ def is_allowed(self, cell: str | float | int) -> bool:
-------
bool
If the cell is allowed
- '''
+ """
return True
class VariableDefinition(AbstractColumnDefinition):
- '''
+ """
Class to store variable columns
Parameters
@@ -135,7 +146,8 @@ class VariableDefinition(AbstractColumnDefinition):
-------
is_allowed
Check if cell is allowed
- '''
+ """
+
def __init__(self, name: str, description: str, required: bool):
super().__init__(
col_type='variable',
@@ -152,7 +164,7 @@ def is_allowed(self, cell: str | float | int) -> bool:
class UnitDefinition(AbstractColumnDefinition):
- '''
+ """
Class to store Unit columns
Parameters
@@ -170,7 +182,8 @@ class UnitDefinition(AbstractColumnDefinition):
-------
is_allowed
Check if cell is allowed
- '''
+ """
+
def __init__(self, name: str, description: str, required: bool):
super().__init__(
col_type='unit',
@@ -195,7 +208,7 @@ def is_allowed(self, cell: str | float | int) -> bool:
class ValueDefinition(AbstractColumnDefinition):
- '''
+ """
Class to store Value columns
Parameters
@@ -213,7 +226,8 @@ class ValueDefinition(AbstractColumnDefinition):
-------
is_allowed
Check if cell is allowed
- '''
+ """
+
def __init__(self, name: str, description: str, required: bool):
super().__init__(
col_type='value',
@@ -230,7 +244,7 @@ def is_allowed(self, cell: str | float | int) -> bool:
class CommentDefinition(AbstractColumnDefinition):
- '''
+ """
Class to store comment columns
Parameters
@@ -248,7 +262,8 @@ class CommentDefinition(AbstractColumnDefinition):
-------
is_allowed
Check if cell is allowed
- '''
+ """
+
def __init__(self, name: str, description: str, required: bool):
super().__init__(
col_type='comment',
@@ -263,7 +278,7 @@ def is_allowed(self, cell: str | float | int) -> bool:
class AbstractFieldDefinition(AbstractColumnDefinition):
- '''
+ """
Abstract class to store fields
Parameters
@@ -289,7 +304,8 @@ class AbstractFieldDefinition(AbstractColumnDefinition):
select_and_expand
Select and expand fields
- '''
+ """
+
def __init__(self, field_type: str, name: str, description: str, dtype: str, coded: bool,
codes: Optional[dict[str, str]] = None):
if field_type not in ['case', 'component']:
@@ -308,26 +324,26 @@ def __init__(self, field_type: str, name: str, description: str, dtype: str, cod
@property
def field_type(self) -> str:
- '''Get field type'''
+ """Get field type"""
return self._field_type
@property
def coded(self) -> bool:
- '''Return if field is coded'''
+ """Return if field is coded"""
return self._coded
@property
def codes(self) -> None | dict[str, str]:
- '''Get field codes'''
+ """Get field codes"""
return self._codes
@property
def default(self):
- '''Get symbol for default value'''
+ """Get symbol for default value"""
return '*' if self._field_type == 'case' else '#'
def is_allowed(self, cell: str | float | int) -> bool:
- ''' Chek if cell is allowed'''
+ """ Chek if cell is allowed"""
if pd.isnull(cell):
return False
if self._coded:
@@ -348,9 +364,8 @@ def _select(self, df: pd.DataFrame, col_id: str, field_vals: list, **kwargs):
# Select fields
return df.query(f"{col_id}.isin({field_vals})").reset_index(drop=True)
-
def select_and_expand(self, df: pd.DataFrame, col_id: str, field_vals: None | list, **kwargs) -> pd.DataFrame:
- '''
+ """
Select and expand fields which are valid for multiple periods or other field vals
Parameters
@@ -369,7 +384,7 @@ def select_and_expand(self, df: pd.DataFrame, col_id: str, field_vals: None | li
pd.DataFrame
Dataframe where fields are selected and expanded
- '''
+ """
# get list of selected field values
if field_vals is None:
if col_id == 'period':
@@ -377,7 +392,10 @@ def select_and_expand(self, df: pd.DataFrame, col_id: str, field_vals: None | li
elif self._coded:
field_vals = list(self._codes.keys())
else:
- field_vals = [v for v in df[col_id].unique() if v != '*' and not pd.isnull(v)]
+ field_vals = [
+ v for v in df[col_id].unique()
+ if v != '*' and not pd.isnull(v)
+ ]
else:
# ensure that field values is a list of elements (not tuple, not single value)
if isinstance(field_vals, tuple):
@@ -387,11 +405,12 @@ def select_and_expand(self, df: pd.DataFrame, col_id: str, field_vals: None | li
# check that every element is of allowed type
for val in field_vals:
if not self.is_allowed(val):
- raise Exception(f"Invalid type selected for field '{col_id}': {val}")
+ raise Exception(f"Invalid type selected for field "
+ f"'{col_id}': {val}")
if '*' in field_vals:
- raise Exception(f"Selected values for field '{col_id}' must not contain the asterisk."
- f"Omit the '{col_id}' argument to select all entries.")
-
+ raise Exception(f"Selected values for field '{col_id}' must "
+ f"not contain the asterisk. Omit the "
+ f"'{col_id}' argument to select all entries.")
df = self._expand(df, col_id, field_vals, **kwargs)
df = self._select(df, col_id, field_vals, **kwargs)
@@ -400,7 +419,7 @@ def select_and_expand(self, df: pd.DataFrame, col_id: str, field_vals: None | li
class RegionFieldDefinition(AbstractFieldDefinition):
- '''
+ """
Class to store Region fields
Parameters
@@ -409,21 +428,23 @@ class RegionFieldDefinition(AbstractFieldDefinition):
Name of the field
description: str
Description of the field
- '''
+ """
+
def __init__(self, name: str, description: str):
- '''Initialize parent class'''
+ """Initialize parent class"""
super().__init__(
field_type='case',
name=name,
description=description,
dtype='category',
coded=True,
- codes={'World': 'World'}, # TODO: Insert list of country names here.
+ # TODO: Insert list of country names here.
+ codes={'World': 'World'},
)
class PeriodFieldDefinition(AbstractFieldDefinition):
- '''
+ """
Class to store Period fields
Parameters
@@ -437,9 +458,10 @@ class PeriodFieldDefinition(AbstractFieldDefinition):
-------
is_allowed
Checks if cell is allowed
- '''
+ """
+
def __init__(self, name: str, description: str):
- '''Initialize parent class'''
+ """Initialize parent class"""
super().__init__(
field_type='case',
name=name,
@@ -449,7 +471,7 @@ def __init__(self, name: str, description: str):
)
def is_allowed(self, cell: str | float | int) -> bool:
- '''Check if cell is a flowat or *'''
+ """Check if cell is a float or *"""
return is_float(cell) or cell == '*'
def _expand(self, df: pd.DataFrame, col_id: str, field_vals: list, **kwargs) -> pd.DataFrame:
@@ -460,7 +482,6 @@ def _expand(self, df: pd.DataFrame, col_id: str, field_vals: list, **kwargs) ->
.merge(pd.DataFrame.from_dict({col_id: field_vals}), how='cross'),
]).astype({'period': 'float'})
-
def _select(self, df: pd.DataFrame, col_id: str, field_vals: list[int | float], **kwargs) -> pd.DataFrame:
# group by identifying columns and select periods/generate time series
# get list of groupable columns
@@ -495,7 +516,10 @@ def _select(self, df: pd.DataFrame, col_id: str, field_vals: list[int | float],
# check case
cond_match = req_rows[col_id].isin(periods_exist)
- cond_extrapolate = (req_rows[f"{col_id}_upper"].isna() | req_rows[f"{col_id}_lower"].isna())
+ cond_extrapolate = (
+ req_rows[f"{col_id}_upper"].isna()
+ | req_rows[f"{col_id}_lower"].isna()
+ )
# match
rows_match = req_rows.loc[cond_match] \
@@ -503,34 +527,54 @@ def _select(self, df: pd.DataFrame, col_id: str, field_vals: list[int | float],
# extrapolate
rows_extrapolate = (
- req_rows.loc[~cond_match & cond_extrapolate]
- .assign(
- period_combined=lambda x: np.where(
- x.notna()[f"{col_id}_upper"],
- x[f"{col_id}_upper"],
- x[f"{col_id}_lower"],
- )
- )
- .merge(rows.rename(columns={col_id: f"{col_id}_combined"}), on=f"{col_id}_combined")
- if 'extrapolate_period' not in kwargs or kwargs['extrapolate_period'] else
+ req_rows.loc[~cond_match & cond_extrapolate].assign(
+ period_combined=lambda x: np.where(
+ x.notna()[f"{col_id}_upper"],
+ x[f"{col_id}_upper"],
+ x[f"{col_id}_lower"],
+ ),
+ ).merge(
+ rows.rename(columns={col_id: f"{col_id}_combined"}),
+ on=f"{col_id}_combined",
+ )
+ if 'extrapolate_period' not in kwargs or
+ kwargs['extrapolate_period'] else
pd.DataFrame()
)
# interpolate
rows_interpolate = req_rows.loc[~cond_match & ~cond_extrapolate] \
- .merge(rows.rename(columns={c: f"{c}_upper" for c in rows.columns}), on=f"{col_id}_upper") \
- .merge(rows.rename(columns={c: f"{c}_lower" for c in rows.columns}), on=f"{col_id}_lower") \
- .assign(value=lambda row: row['value_lower'] + (row[f"{col_id}_upper"] - row[col_id]) /
- (row[f"{col_id}_upper"] - row[f"{col_id}_lower"]) * (row['value_upper'] - row['value_lower']))
+ .merge(
+ rows.rename(columns={c: f"{c}_upper" for c in rows.columns}),
+ on=f"{col_id}_upper",
+ ) \
+ .merge(
+ rows.rename(columns={c: f"{c}_lower" for c in rows.columns}),
+ on=f"{col_id}_lower",
+ ) \
+ .assign(
+ value=lambda row: row['value_lower']
+ + (row[f"{col_id}_upper"]
+ - row[col_id]) /
+ (row[f"{col_id}_upper"]
+ - row[f"{col_id}_lower"]) *
+ (row['value_upper']
+ - row['value_lower'])
+ )
# combine into one dataframe and drop unused columns
- rows_to_concat = [df for df in [rows_match, rows_extrapolate, rows_interpolate] if not df.empty]
+ rows_to_concat = [
+ df for df in [rows_match, rows_extrapolate, rows_interpolate]
+ if not df.empty
+ ]
if rows_to_concat:
rows_append = pd.concat(rows_to_concat)
rows_append.drop(columns=[
- c for c in [f"{col_id}_upper", f"{col_id}_lower", f"{col_id}_combined", 'value_upper', 'value_lower']
- if c in rows_append.columns
- ], inplace=True)
+ c for c in [f"{col_id}_upper", f"{col_id}_lower",
+ f"{col_id}_combined", 'value_upper',
+ 'value_lower']
+ if c in rows_append.columns
+ ], inplace=True)
# add to return list
ret.append(rows_append)
@@ -540,7 +584,7 @@ def _select(self, df: pd.DataFrame, col_id: str, field_vals: list[int | float],
class SourceFieldDefinition(AbstractFieldDefinition):
- '''
+ """
Class to store Source fields
Parameters
@@ -549,9 +593,9 @@ class SourceFieldDefinition(AbstractFieldDefinition):
Name of the field
description: str
Description of the field
- '''
+ """
def __init__(self, name: str, description: str):
- '''Initialize parent class'''
+ """Initialize parent class"""
super().__init__(
field_type='case',
name=name,
@@ -562,28 +606,36 @@ def __init__(self, name: str, description: str):
class CustomFieldDefinition(AbstractFieldDefinition):
- '''
+ """
Class to store Custom fields
Parameters
----------
**field_specs:
Specs of the custom fields
- '''
+ """
+
def __init__(self, **field_specs):
- '''Check if the field specs are of the required type and format,
- initialize parent class'''
- if not ('type' in field_specs and isinstance(field_specs['type'], str) and
+ """Check if the field specs are of the required type and format,
+ initialize parent class"""
+ if not ('type' in field_specs and
+ isinstance(field_specs['type'], str) and
field_specs['type'] in ['case', 'component']):
- raise Exception("Field type must be provided and equal to 'case' or 'component'.")
- if not ('name' in field_specs and isinstance(field_specs['name'], str)):
+ raise Exception("Field type must be provided and equal to 'case' "
+ "or 'component'.")
+ if not ('name' in field_specs and
+ isinstance(field_specs['name'], str)):
raise Exception('Field name must be provided and of type string.')
- if not ('description' in field_specs and isinstance(field_specs['description'], str)):
- raise Exception('Field description must be provided and of type string.')
- if not ('coded' in field_specs and isinstance(field_specs['coded'], bool)):
+ if not ('description' in field_specs and
+ isinstance(field_specs['description'], str)):
+ raise Exception('Field description must be provided and of type '
+ 'string.')
+ if not ('coded' in field_specs and
+ isinstance(field_specs['coded'], bool)):
raise Exception('Field coded must be provided and of type bool.')
if field_specs['coded'] and not ('codes' in field_specs and isinstance(field_specs['codes'], dict)):
- raise Exception('Field codes must be provided and contain a dict of possible codes.')
+ raise Exception('Field codes must be provided and contain a dict '
+ 'of possible codes.')
super().__init__(
field_type=field_specs['type'],
@@ -660,7 +712,7 @@ def __init__(self, **field_specs):
def read_fields(variable: str):
- '''
+ """
Read the fields of a variable
Parameters
@@ -674,13 +726,16 @@ def read_fields(variable: str):
Dictionary containing the fields
comments
Dictionary containing the comments
-
- '''
+ """
fields: dict[str, CustomFieldDefinition] = {}
comments: dict[str, CommentDefinition] = {}
for database_id in databases:
- fpath = databases[database_id] / 'fields' / ('/'.join(variable.split('|')) + '.yml')
+ fpath = (
+ databases[database_id] /
+ 'fields' /
+ ('/'.join(variable.split('|')) + '.yml')
+ )
if fpath.exists():
if not fpath.is_file():
raise Exception(f"Expected YAML file, but not a file: {fpath}")
@@ -694,11 +749,12 @@ def read_fields(variable: str):
required=False,
)
else:
- raise Exception(f"Unkown field type: {col_id}")
+ raise Exception(f"Unknown field type: {col_id}")
# make sure the field ID is not the same as for a base column
for col_id in fields:
if col_id in base_columns:
- raise Exception(f"Field ID cannot be equal to a base column ID: {col_id}")
+ raise Exception(f"Field ID cannot be equal to a base column ID: "
+ f"{col_id}")
return fields, comments
diff --git a/python/posted/config.py b/python/posted/config.py
index cd14904..450b9ca 100644
--- a/python/posted/config.py
+++ b/python/posted/config.py
@@ -8,14 +8,22 @@
techs = {}
for database_path in databases.values():
# read flow types
- flows |= read_csv_file(database_path / 'flow_types.csv').pivot(index='flow_id', columns='attribute', values='value').to_dict('index')
+ flows |= read_csv_file(database_path / 'flow_types.csv') \
+ .pivot(index='flow_id', columns='attribute', values='value') \
+ .to_dict('index')
# read technologies
- techs |= read_csv_file(database_path / 'tech_types.csv').set_index('tech_id').to_dict('index')
+ techs |= read_csv_file(database_path / 'tech_types.csv') \
+ .set_index('tech_id') \
+ .to_dict('index')
# loop over databases and read definitions
variables = {}
for database_path in databases.values():
# load variable definitions
- variables |= read_definitions(database_path / 'definitions' / 'variable', flows, techs)
+ variables |= read_definitions(
+ database_path / 'definitions' / 'variable',
+ flows,
+ techs,
+ )
diff --git a/python/posted/definitions.py b/python/posted/definitions.py
index 50f562f..a518d8a 100644
--- a/python/posted/definitions.py
+++ b/python/posted/definitions.py
@@ -8,28 +8,33 @@
def read_definitions(definitions_dir: Path, flows: dict, techs: dict):
- '''
- Reads YAML files from definitions directory, extracts tags, inserts tags into
- definitions, replaces tokens in definitions, and returns the updated definitions.
+ """
+ Reads YAML files from definitions directory, extracts tags, inserts
+ tags into definitions, replaces tokens in definitions, and returns
+ the updated definitions.
Parameters
----------
definitions_dir : Path
Path leading to the definitions
flows : dict
- Dictionary containng the different flow types. Each key represents a flow type, the corresponding
- value is a dictionary containing key value pairs of attributes like denisty, energycontent and their
- values.
+ Dictionary containng the different flow types. Each key
+ represents a flow type, the corresponding value is a dictionary
+ containing key value pairs of attributes like denisty,
+ energycontent and their values.
techs : dict
- Dictionary containing information about different technologies. Each key in the
- dictionary represents a unique technology ID, and the corresponding value is a dictionary containing
- various specifications for that technology, like 'description', 'class', 'primary output' etc.
+ Dictionary containing information about different technologies.
+ Each key in the dictionary represents a unique technology ID,
+ and the corresponding value is a dictionary containing various
+ specifications for that technology, like 'description', 'class',
+ 'primary output' etc.
Returns
-------
dict
- Dictionary containing the definitions after processing and replacing tags and tokens
- '''
+ Dictionary containing the definitions after processing and
+ replacing tags and tokens
+ """
# check that variables exists and is a directory
if not definitions_dir.exists():
return {}
@@ -73,38 +78,47 @@ def read_definitions(definitions_dir: Path, flows: dict, techs: dict):
'default currency': lambda def_specs: default_currency,
'primary output': lambda def_specs: def_specs['primary_output'],
} | {
- f"default flow unit {unit_component}": unit_token_func(unit_component, flows)
+ f"default flow unit {unit_component}": unit_token_func(unit_component,
+ flows)
for unit_component in ('full', 'raw', 'variant')
}
for def_key, def_specs in definitions.items():
for def_property, def_value in def_specs.items():
for token_key, token_func in tokens.items():
- if isinstance(def_value, str) and f"{{{token_key}}}" in def_value:
- def_specs[def_property] = def_specs[def_property].replace(f"{{{token_key}}}", token_func(def_specs))
+ if (isinstance(def_value, str) and
+ f"{{{token_key}}}" in def_value):
+ def_specs[def_property] = (
+ def_specs[def_property].replace(f"{{{token_key}}}",
+ token_func(def_specs))
+ )
return definitions
def replace_tags(definitions: dict, tag: str, items: dict[str, dict]):
- '''
- Replaces specified tags in dictionary keys and values with corresponding
- items from another dictionary.
+ """
+ Replaces specified tags in dictionary keys and values with
+ corresponding items from another dictionary.
Parameters
----------
definitions : dict
- Dictionary containing the definitions, where the tags should be replaced by the items
+ Dictionary containing the definitions, where the tags should be
+ replaced by the items.
tag : str
- String to identify where replacements should be made in the definitions. Specifies
- the placeholder that needs to be replaced with actual values from the `items` dictionary.
+ String to identify where replacements should be made in the
+ definitions. Specifies the placeholder that needs to be
+ replaced with actual values from the `items` dictionary.
items : dict[str, dict]
- Dictionary containing the items from whith to replace the definitions
+ Dictionary containing the items from whith to replace the
+ definitions.
Returns
-------
dict
- Dictionary containing the definitions with replacements based on the provided tag and items.
- '''
+ Dictionary containing the definitions with replacements based
+ on the provided tag and items.
+ """
definitions_with_replacements = {}
for def_name, def_specs in definitions.items():
@@ -112,46 +126,55 @@ def replace_tags(definitions: dict, tag: str, items: dict[str, dict]):
definitions_with_replacements[def_name] = def_specs
else:
for item_name, item_specs in items.items():
- item_desc = item_specs['description'] if 'description' in item_specs else item_name
+ item_desc = (
+ item_specs['description']
+ if 'description' in item_specs else
+ item_name
+ )
def_name_new = def_name.replace(f"{{{tag}}}", item_name)
def_specs_new = copy.deepcopy(def_specs)
def_specs_new |= item_specs
# replace tags in description
- def_specs_new['description'] = def_specs['description'].replace(f"{{{tag}}}", item_desc)
+ def_specs_new['description'] = def_specs['description'] \
+ .replace(f"{{{tag}}}", item_desc)
# replace tags in other specs
for k, v in def_specs_new.items():
if k == 'description' or not isinstance(v, str):
continue
- def_specs_new[k] = def_specs_new[k].replace(f"{{{tag}}}", item_name)
- def_specs_new[k] = def_specs_new[k].replace('{parent variable}', def_name[:def_name.find(f"{{{tag}}}")-1])
+ def_specs_new[k] = def_specs_new[k] \
+ .replace(f"{{{tag}}}", item_name)
+ def_specs_new[k] = def_specs_new[k] \
+ .replace('{parent variable}',
+ def_name[:def_name.find(f"{{{tag}}}")-1])
definitions_with_replacements[def_name_new] = def_specs_new
return definitions_with_replacements
-def unit_token_func(unit_component: Literal['full', 'raw', 'variant'], flows: dict):
- '''
- Takes a unit component type and a dictionary of flows, and returns a lambda function
- that extracts the default unit based on the specified component type from the flow
- dictionary.
+def unit_token_func(unit_component: Literal['full', 'raw', 'variant'],
+ flows: dict):
+ """
+ Takes a unit component type and a dictionary of flows, and returns
+ a lambda function that extracts the default unit based on the
+ specified component type from the flow dictionary.
Parameters
----------
unit_component : Literal['full', 'raw', 'variant']
Specifies the type of unit token to be returned.
flows : dict
- Dictionary containg the flows
-
+ Dictionary containing the flows
Returns
-------
lambda function
- lambda function that takes a dictionary `def_specs` as input. The lambda function
- will return different values based on the `unit_component` parameter and
- the contents of the `flows` dictionary.
- '''
+ A lambda function that takes a dictionary `def_specs` as
+ input. The lambda function will return different values
+ based on the `unit_component` parameter and the contents of
+ the `flows` dictionary.
+ """
return lambda def_specs: (
'ERROR'
if 'flow_id' not in def_specs or def_specs['flow_id'] not in flows else
@@ -160,7 +183,10 @@ def unit_token_func(unit_component: Literal['full', 'raw', 'variant'], flows: di
if unit_component == 'full' else
flows[def_specs['flow_id']]['default_unit'].split(';')[0]
if unit_component == 'raw' else
- ';'.join([''] + flows[def_specs['flow_id']]['default_unit'].split(';')[1:2])
+ ';'.join(
+ ['']
+ + flows[def_specs['flow_id']]['default_unit'].split(';')[1:2]
+ )
if unit_component == 'variant' else
'UNKNOWN'
)
diff --git a/python/posted/masking.py b/python/posted/masking.py
index 3817340..d076af5 100644
--- a/python/posted/masking.py
+++ b/python/posted/masking.py
@@ -11,24 +11,25 @@
def apply_cond(df: pd.DataFrame, cond: MaskCondition):
- '''Takes a pandas DataFrame and a condition, which can be a string, dictionary,
- or callable, and applies the condition to the DataFrame using `eval` or `apply`
- accordingly.
+ """
+ Takes a pandas DataFrame and a condition, which can be a string,
+ dictionary, or callable, and applies the condition to the DataFrame
+ using `eval` or `apply` accordingly.
Parameters
----------
df : pd.DataFrame
- A pandas DataFrame containing the data on which the condition will be applied.
+ A pandas DataFrame containing the data on which the condition
+ will be applied.
cond : MaskCondition
- The condition to be applied on the dataframe. Can be either a string, a dictionary, or a
- callable function.
+ The condition to be applied on the dataframe. Can be either a
+ string, a dictionary, or a callable function.
Returns
-------
pd.DataFrame
Dataframe evaluated at the mask condition
-
- '''
+ """
if isinstance(cond, str):
return df.eval(cond)
elif isinstance(cond, dict):
@@ -40,7 +41,9 @@ def apply_cond(df: pd.DataFrame, cond: MaskCondition):
class Mask:
- '''Class to define masks with conditions and weights to apply to DataFiles
+ """
+ Class to define masks with conditions and weights to apply to
+ DataFiles
Parameters
----------
@@ -54,30 +57,35 @@ class Mask:
comment: str, optional
Comment
- '''
+ """
def __init__(self,
where: MaskCondition | list[MaskCondition] = None,
use: MaskCondition | list[MaskCondition] = None,
weight: None | float | str | list[float | str] = None,
other: float = np.nan,
comment: str = ''):
- '''set fields from constructor arguments, perform consistency checks on fields,
- set default weight to 1 if not set otherwise'''
- self._where: list[MaskCondition] = [] if where is None else where if isinstance(where, list) else [where]
- self._use: list[MaskCondition] = [] if use is None else use if isinstance(use, list) else [use]
+ """Set fields from constructor arguments, perform consistency
+ checks on fields, set default weight to 1 if not set
+ otherwise"""
+ self._where: list[MaskCondition] = (
+ [] if where is None else where
+ if isinstance(where, list) else [where]
+ )
+ self._use: list[MaskCondition] = (
+ [] if use is None else use
+ if isinstance(use, list) else [use]
+ )
self._weight: list[float] = (
- None
- if weight is None else
- [float(w) for w in weight]
- if isinstance(weight, list) else
- [float(weight)]
+ None if weight is None else [float(w) for w in weight]
+ if isinstance(weight, list) else [float(weight)]
)
self._other: float = other
self._comment: str = comment
# perform consistency checks on fields
if self._use and self._weight and len(self._use) != len(self._weight):
- raise Exception(f"Must provide same length of 'use' conditions as 'weight' values.")
+ raise Exception(f"Must provide same length of 'use' conditions as "
+ f"'weight' values.")
# set default weight to 1 if not set otherwise
if not self._weight:
@@ -85,7 +93,8 @@ def __init__(self,
def matches(self, df: pd.DataFrame):
- '''Check if a mask matches a dataframe (all 'where' conditions match across all rows)
+ """
+ Check if a mask matches a dataframe (all 'where' conditions match across all rows)
Parameters
----------
@@ -94,7 +103,8 @@ def matches(self, df: pd.DataFrame):
Returns
-------
bool
- If the mask matches the dataframe'''
+ If the mask matches the dataframe
+ """
for w in self._where:
if not apply_cond(df, w).all():
return False
@@ -102,7 +112,8 @@ def matches(self, df: pd.DataFrame):
def get_weights(self, df: pd.DataFrame):
- '''Apply weights to the dataframe
+ """
+ Apply weights to the dataframe
Parameters
----------
@@ -112,7 +123,8 @@ def get_weights(self, df: pd.DataFrame):
Returns
-------
pd.DataFrame
- Dataframe with applied weights'''
+ Dataframe with applied weights
+ """
ret = pd.Series(index=df.index, data=self._other)
# apply weights where the use condition matches
@@ -123,7 +135,8 @@ def get_weights(self, df: pd.DataFrame):
def read_masks(variable: str):
- '''Reads YAML files containing mask specifications from multiple databases
+ """
+ Reads YAML files containing mask specifications from multiple databases
and returns a list of Mask objects.
Parameters
@@ -135,8 +148,7 @@ def read_masks(variable: str):
-------
list
List with masks for the variable
-
- '''
+ """
ret: list[Mask] = []
for database_id in databases:
diff --git a/python/posted/noslag.py b/python/posted/noslag.py
index 94d074f..61c66d7 100644
--- a/python/posted/noslag.py
+++ b/python/posted/noslag.py
@@ -7,9 +7,8 @@
import pandas as pd
from sigfig import round
-from posted.config import variables
-from posted.settings import default_periods
-from posted.columns import AbstractFieldDefinition, CustomFieldDefinition, read_fields, AbstractColumnDefinition, base_columns
+from posted.columns import AbstractFieldDefinition, CustomFieldDefinition, \
+ read_fields, AbstractColumnDefinition, base_columns
from posted.path import databases
from posted.masking import Mask, read_masks
from posted.tedf import TEBase, TEDF
@@ -17,9 +16,12 @@
# get list of TEDFs potentially containing variable
-def collect_files(parent_variable: str, include_databases: Optional[list[str]] = None):
- '''Takes a parent variable and optional list of databases to include,
- checks for their existence, and collects files and directories based on the parent variable.
+def collect_files(parent_variable: str,
+ include_databases: Optional[list[str]] = None):
+ """
+ Takes a parent variable and optional list of databases to include,
+ checks for their existence, and collects files and directories
+ based on the parent variable.
Parameters
----------
@@ -32,22 +34,24 @@ def collect_files(parent_variable: str, include_databases: Optional[list[str]] =
-------
list[tuple]
List of tuples containing the parent variable and the
- database ID for each file found in the specified directories.
-
- '''
+ database ID for each file found in the specified
+ directories.
+ """
if not parent_variable:
raise Exception('Variable may not me empty.')
# check that the requested database to include can be found
if include_databases is not None:
for database_id in include_databases:
- if not (database_id in databases and databases[database_id].exists()):
+ if not (database_id in databases and
+ databases[database_id].exists()):
raise Exception(f"Could not find database '{database_id}'.")
ret = []
for database_id, database_path in databases.items():
# skip ted paths not requested to include
- if include_databases is not None and database_id not in include_databases: continue
+ if (include_databases is not None and
+ database_id not in include_databases): continue
# find top-level file and directory
top_path = '/'.join(parent_variable.split('|'))
@@ -61,7 +65,9 @@ def collect_files(parent_variable: str, include_databases: Optional[list[str]] =
# add all files contained in top-level directory
if top_directory.exists() and top_directory.is_dir():
for sub_file in top_directory.rglob('*.csv'):
- sub_variable = parent_variable + '|' + sub_file.relative_to(top_directory).name.rstrip('.csv')
+ child_variable = sub_file.relative_to(top_directory) \
+ .name.rstrip('.csv')
+ sub_variable = f"{parent_variable}|{child_variable}"
ret.append((sub_variable, database_id))
# loop over levels
@@ -78,33 +84,39 @@ def collect_files(parent_variable: str, include_databases: Optional[list[str]] =
return ret
-def normalise_units(df: pd.DataFrame, level: Literal['reported', 'reference'], var_units: dict[str, str],
- var_flow_ids: dict[str, str]):
- '''
+def normalise_units(
+ df: pd.DataFrame,
+ level: Literal['reported', 'reference'],
+ var_units: dict[str, str],
+ var_flow_ids: dict[str, str]):
+ """
Takes a DataFrame with reported or reference data, along with
- dictionaries mapping variable units and flow IDs, and normalizes the units of the variables in the
- DataFrame based on the provided mappings.
+ dictionaries mapping variable units and flow IDs, and normalizes
+ the units of the variables in the DataFrame based on the provided
+ mappings.
Parameters
----------
df : pd.DataFrame
Dataframe to be normalised
level : Literal['reported', 'reference']
- Specifies whether the data should be normalised on the reported or reference values
+ Specifies whether the data should be normalised on the reported
+ or reference values
var_units : dict[str, str]
- Dictionary that maps a combination of parent variable and variable
- to its corresponding unit. The keys in the dictionary are in the format "{parent_variable}|{variable}",
- and the values are the units associated with that variable.
+ Dictionary that maps a combination of parent variable and
+ variable to its corresponding unit. The keys in the dictionary
+ are in the format "{parent_variable}|{variable}", and the values
+ are the units associated with that variable.
var_flow_ids : dict[str, str]
- Dictionary that maps a combination of parent variable and variable to a
- specific flow ID. This flow ID is used for unit conversion in the `normalise_units` function.
+ Dictionary that maps a combination of parent variable and
+ variable to a specific flow ID. This flow ID is used for unit
+ conversion in the `normalise_units` function.
Returns
-------
pd.DataFrame
Normalised dataframe
-
- '''
+ """
prefix = '' if level == 'reported' else 'reference_'
var_col_id = prefix + 'variable'
@@ -140,17 +152,19 @@ def normalise_units(df: pd.DataFrame, level: Literal['reported', 'reference'], v
if level == 'reported':
df_tmp['uncertainty'] *= conv_factor
- # Uupdate unit columns
+ # Update unit columns
df_tmp[unit_col_id] = df_tmp['target_unit']
- # Drop unneccessary columns and return
+ # Drop unnecessary columns and return
return df_tmp.drop(columns=['target_unit', 'target_flow_id'])
def normalise_values(df: pd.DataFrame):
- '''Takes a DataFrame as input, normalizes the 'value' and 'uncertainty'
- columns by the reference value, and updates the 'reference_value' column accordingly.
+ """
+ Takes a DataFrame as input, normalizes the 'value' and
+ 'uncertainty' columns by the reference value, and updates the
+ 'reference_value' column accordingly.
Parameters
----------
@@ -160,12 +174,14 @@ def normalise_values(df: pd.DataFrame):
Returns
-------
pd.DataFrame
- Returns a modified DataFrame where the 'value' column has been
- divided by the 'reference_value' column (or 1.0 if 'reference_value' is null), the 'uncertainty'
- column has been divided by the 'reference_value' column, and the 'reference_value' column has been
- replaced with 1.0 if it was not null, otherwise
+ Returns a modified DataFrame where the 'value' column has
+ been divided by the 'reference_value' column (or 1.0 if
+ 'reference_value' is null), the 'uncertainty' column has
+ been divided by the 'reference_value' column, and the
+ 'reference_value' column has been replaced with 1.0 if it
+ was not null.
+ """
- '''
# Calculate reference value
reference_value = df.apply(
lambda row:
@@ -174,6 +190,7 @@ def normalise_values(df: pd.DataFrame):
1.0,
axis=1,
)
+
# Calculate new value, reference value and uncertainty
value_new = df['value'] / reference_value
uncertainty_new = df['uncertainty'] / reference_value
@@ -184,12 +201,18 @@ def normalise_values(df: pd.DataFrame):
np.nan,
axis=1,
)
+
# Assign new values to dataframe and return
- return df.assign(value=value_new, uncertainty=uncertainty_new, reference_value=reference_value_new)
+ return df.assign(
+ value=value_new,
+ uncertainty=uncertainty_new,
+ reference_value=reference_value_new,
+ )
class HarmoniseMappingFailure(Warning):
- """Warning raised for rows in TEDataSets where mappings fail.
+ """
+ Warning raised for rows in TEDataSets where mappings fail.
Parameters
----------
@@ -205,8 +228,10 @@ class HarmoniseMappingFailure(Warning):
message
explanation of the error
"""
- def __init__(self, row_data: pd.DataFrame, message: str = "Failure when selecting from dataset."):
- '''Save constructor arguments as public fields, compose warning message, call super constructor'''
+ def __init__(self,
+ row_data: pd.DataFrame,
+ message: str = "Failure when selecting from dataset."):
+ """Create failure warning and attach row data."""
# save constructor arguments as public fields
self.row_data: pd.DataFrame = row_data
self.message: str = message
@@ -218,9 +243,9 @@ def __init__(self, row_data: pd.DataFrame, message: str = "Failure when selectin
super().__init__(warning_message)
-
def combine_units(numerator: str, denominator: str):
- '''Combine fraction of two units into updated unit string
+ """
+ Combine fraction of two units into updated unit string
Parameters
----------
@@ -233,11 +258,10 @@ def combine_units(numerator: str, denominator: str):
-------
str
updated unit string after simplification
- '''
-
-
+ """
ret = ureg(f"{numerator}/({denominator})").u
- # chekc if ret is dimensionless, if not return ret, else return the explicit quotient
+ # Check if ret is dimensionless, if not return ret, else return the
+ # explicit quotient
if not ret.dimensionless:
return str(ret)
else:
@@ -247,8 +271,10 @@ def combine_units(numerator: str, denominator: str):
class DataSet(TEBase):
- '''Class to store, normalise, select and aggregate DataSets
- Parameters
+ """
+ Class to store, normalise, select and aggregate DataSets
+
+ Parameters
----------
parent_variable: str
Variable to collect Data on
@@ -257,12 +283,10 @@ class DataSet(TEBase):
file_paths: Optional[list[path]], optional
Paths to load data from
check_inconsistencies: bool, optional
- Wether to check for inconsistencies
+ Whether to check for inconsistencies
data: Optional[pd.DataFrame], optional
Specific data to include in the dataset
-
-
- '''
+ """
_df: None | pd.DataFrame
_columns: dict[str, AbstractColumnDefinition]
_fields: dict[str, AbstractFieldDefinition]
@@ -276,13 +300,11 @@ def __init__(self,
check_inconsistencies: bool = False,
data: Optional[pd.DataFrame] = None,
):
- '''Initialise parent class and fields, load data from specified databases and files
-
-
- '''
+ """Initialise parent class and fields, load data from specified
+ databases and files"""
TEBase.__init__(self, parent_variable)
- # initialise fields
+ # Initialise fields
self._df = None
self._columns = base_columns
self._fields = {
@@ -297,34 +319,52 @@ def __init__(self,
self._df = data
else:
# read TEDataFiles and combine into dataset
- include_databases = list(include_databases) if include_databases is not None else list(databases.keys())
- self._df = self._load_files(include_databases, file_paths or [], check_inconsistencies)
-
+ include_databases = (
+ list(include_databases)
+ if include_databases is not None else
+ list(databases.keys())
+ )
+ self._df = self._load_files(
+ include_databases=include_databases,
+ file_paths=file_paths or [],
+ check_inconsistencies=check_inconsistencies,
+ )
@property
def data(self):
- '''str: Get or set dataframe'''
+ """str: Get or set dataframe"""
return self._df
def set_data(self, df: pd.DataFrame):
self._df = df
-
- def _load_files(self, include_databases: list[str], file_paths: list[Path], check_inconsistencies: bool):
+ def _load_files(self,
+ include_databases: list[str],
+ file_paths: list[Path],
+ check_inconsistencies: bool):
# Load TEDFs and compile into NSHADataSet
-
files: list[TEDF] = []
# collect TEDF and append to list
- collected_files = collect_files(parent_variable=self._parent_variable, include_databases=include_databases)
+ collected_files = collect_files(
+ parent_variable=self._parent_variable,
+ include_databases=include_databases,
+ )
for file_variable, file_database_id in collected_files:
- files.append(TEDF(parent_variable=file_variable, database_id=file_database_id))
+ files.append(TEDF(
+ parent_variable=file_variable,
+ database_id=file_database_id,
+ ))
for file_path in file_paths:
- files.append(TEDF(parent_variable=self._parent_variable, file_path=file_path))
+ files.append(TEDF(
+ parent_variable=self._parent_variable,
+ file_path=file_path,
+ ))
# raise exception if no TEDF can be loaded
if not files:
- raise Exception(f"No TEDF to load for variable '{self._parent_variable}'.")
+ raise Exception(f"No TEDF to load for variable "
+ f"'{self._parent_variable}'.")
# get fields and masks from databases
files_vars: set[str] = {f.parent_variable for f in files}
@@ -332,12 +372,14 @@ def _load_files(self, include_databases: list[str], file_paths: list[Path], chec
new_fields, new_comments = read_fields(v)
for col_id in new_fields | new_comments:
if col_id in self._columns:
- raise Exception(f"Cannot load TEDFs due to multiple columns with same ID defined: {col_id}")
+ raise Exception(f"Cannot load TEDFs due to multiple "
+ f"columns with same ID defined: {col_id}")
self._fields = new_fields | self._fields
self._columns = new_fields | self._columns | new_comments
self._masks += read_masks(v)
- # load all TEDFs: load from file, check for inconsistencies (if requested), expand cases and variables
+ # load all TEDFs: load from file, check for inconsistencies (if
+ # requested), expand cases and variables
file_dfs: list[pd.DataFrame] = []
for f in files:
# load
@@ -354,7 +396,8 @@ def _load_files(self, include_databases: list[str], file_paths: list[Path], chec
# append to dataframe list
file_dfs.append(df_tmp)
- # compile dataset from the dataframes loaded from the individual files
+ # compile dataset from the dataframes loaded from the
+ # individual files
data = pd.concat(file_dfs)
# query relevant variables
@@ -371,22 +414,25 @@ def _load_files(self, include_databases: list[str], file_paths: list[Path], chec
# return
return data
-
- def normalise(self, override: Optional[dict[str, str]] = None, inplace: bool = False) -> pd.DataFrame | None:
- '''
- normalise data: default reference units, reference value equal to 1.0, default reported units
+ def normalise(self,
+ override: Optional[dict[str, str]] = None,
+ inplace: bool = False) -> pd.DataFrame | None:
+ """
+ Normalise data: default reference units, reference value equal
+ to 1.0, default reported units
Parameters
----------
override: Optional[dict[str,str]], optional
Dictionary with key, value pairs of variables to override
inplace: bool, optional
- Wether to do the normalisation in place
+ Whether to do the normalisation in place
Returns
-------
pd.DataFrame
- if inplace is false, returns normalised dataframe'''
+ If inplace is false, returns normalised dataframe
+ """
normalised, _ = self._normalise(override)
if inplace:
self._df = normalised
@@ -394,13 +440,16 @@ def normalise(self, override: Optional[dict[str, str]] = None, inplace: bool = F
else:
return normalised
- def _normalise(self, override: Optional[dict[str, str]]) -> tuple[pd.DataFrame, dict[str, str]]:
+ def _normalise(self, override: Optional[dict[str, str]]) \
+ -> tuple[pd.DataFrame, dict[str, str]]:
if override is None:
override = {}
# get overridden var specs
var_flow_ids = {
- var_name: var_specs['flow_id'] if 'flow_id' in var_specs else np.nan
+ var_name: (var_specs['flow_id']
+ if 'flow_id' in var_specs else
+ np.nan)
for var_name, var_specs in self._var_specs.items()
}
var_units = {
@@ -410,9 +459,11 @@ def _normalise(self, override: Optional[dict[str, str]]) -> tuple[pd.DataFrame,
# normalise reference units, normalise reference values, and normalise reported units
normalised = self._df \
- .pipe(normalise_units, level='reference', var_units=var_units, var_flow_ids=var_flow_ids) \
+ .pipe(normalise_units, level='reference',
+ var_units=var_units, var_flow_ids=var_flow_ids) \
.pipe(normalise_values) \
- .pipe(normalise_units, level='reported', var_units=var_units, var_flow_ids=var_flow_ids)
+ .pipe(normalise_units, level='reported',
+ var_units=var_units, var_flow_ids=var_flow_ids)
# return normalised data and variable units
return normalised, var_units
@@ -423,7 +474,8 @@ def select(self,
drop_singular_fields: bool = True,
extrapolate_period: bool = True,
**field_vals_select) -> pd.DataFrame:
- '''Select desired data from the dataframe
+ """
+ Select desired data from the dataframe
Parameters
----------
@@ -440,7 +492,7 @@ def select(self,
-------
pd.DataFrame
DataFrame with selected Values
- '''
+ """
selected, var_units, var_references = self._select(
override,
drop_singular_fields,
@@ -461,9 +513,13 @@ def _select(self,
selected = normalised
# drop unit columns and reference value column
- selected.drop(columns=['unit', 'reference_unit', 'reference_value'], inplace=True)
+ selected.drop(
+ columns=['unit', 'reference_unit', 'reference_value'],
+ inplace=True,
+ )
- # drop columns containing comments and uncertainty field (which is currently unsupported)
+ # drop columns containing comments and uncertainty field (which
+ # is currently unsupported)
selected.drop(
columns=['uncertainty'] + [
col_id for col_id, field in self._columns.items()
@@ -475,28 +531,50 @@ def _select(self,
# add parent variable as prefix to other variable columns
selected['variable'] = selected['parent_variable'] + '|' + selected['variable']
selected['reference_variable'] = selected['parent_variable'] + '|' + selected['reference_variable']
- selected.drop(columns=['parent_variable'], inplace=True)
+ selected.drop(
+ columns=['parent_variable'],
+ inplace=True,
+ )
# raise exception if fields listed in arguments that are unknown
for field_id in field_vals_select:
if not any(field_id == col_id for col_id in self._fields):
- raise Exception(f"Field '{field_id}' does not exist and cannot be used for selection.")
+ raise Exception(f"Field '{field_id}' does not exist and "
+ f"cannot be used for selection.")
# order fields for selection: period must be expanded last due to the interpolation
- fields_select = ({col_id: self._fields[col_id] for col_id in field_vals_select} |
- {col_id: field for col_id, field in self._fields.items() if col_id != 'period' and col_id not in field_vals_select} |
- {'period': self._fields['period']})
+ fields_select = ({
+ col_id: self._fields[col_id]
+ for col_id in field_vals_select
+ } | {
+ col_id: field
+ for col_id, field in self._fields.items()
+ if col_id != 'period' and col_id not in field_vals_select
+ } | {
+ 'period': self._fields['period']
+ })
# select and expand fields
for col_id, field in fields_select.items():
- field_vals = field_vals_select[col_id] if col_id in field_vals_select else None
- selected = field.select_and_expand(selected, col_id, field_vals, extrapolate_period=extrapolate_period)
+ field_vals = (
+ field_vals_select[col_id]
+ if col_id in field_vals_select else
+ None
+ )
+ selected = field.select_and_expand(
+ selected,
+ col_id,
+ field_vals,
+ extrapolate_period=extrapolate_period,
+ )
- # drop custom fields with only one value if specified in method argument
+ # drop custom fields with only one value if specified in method
+ # argument
if drop_singular_fields:
selected.drop(columns=[
col_id for col_id, field in self._fields.items()
- if isinstance(field, CustomFieldDefinition) and selected[col_id].nunique() < 2
+ if isinstance(field, CustomFieldDefinition) and
+ selected[col_id].nunique() < 2
], inplace=True)
# apply mappings
@@ -513,7 +591,8 @@ def _select(self,
# Check for multiple reference variables per reported variable
if not var_references.index.is_unique:
- raise Exception(f"Multiple reference variables per reported variable found: {var_references}")
+ raise Exception(f"Multiple reference variables per reported "
+ f"variable found: {var_references}")
var_references = var_references.to_dict()
# Remove 'reference_variable column
@@ -546,7 +625,8 @@ def _apply_mappings(self, expanded: pd.DataFrame, var_units: dict) -> pd.DataFra
# loop over groups
for keys, ids in grouped.groups.items():
# get rows in group
- rows = expanded.loc[ids, [c for c in expanded if c not in group_cols]].copy()
+ cs = [c for c in expanded if c not in group_cols]
+ rows = expanded.loc[ids, cs].copy()
# 1. convert FLH to OCF
cond = rows['variable'].str.endswith('|FLH')
@@ -728,7 +808,8 @@ def aggregate(self, override: Optional[dict[str, str]] = None,
masks: Optional[list[Mask]] = None,
masks_database: bool = True,
**field_vals_select) -> pd.DataFrame:
- '''Aggregates data based on specified parameters, applies masks,
+ """
+ Aggregates data based on specified parameters, applies masks,
and cleans up the resulting DataFrame.
Parameters
@@ -738,38 +819,45 @@ def aggregate(self, override: Optional[dict[str, str]] = None,
drop_singular_fields: bool, optional
If True, drop custom fields with only one value
extrapolate_period: bool, optional
- If True, extrapolate values by extrapolation, if no value for this period is given
+ If True, extrapolate values by extrapolation, if no value
+ for this period is given
agg : Optional[str | list[str] | tuple[str]]
Specifies which fields to aggregate over.
masks : Optional[list[Mask]]
- Specifies a list of Mask objects that will be applied to the data during aggregation.
- These masks can be used to filter or weight the
- data based on certain conditions defined in the Mask objects.
+ Specifies a list of Mask objects that will be applied to the
+ data during aggregation. These masks can be used to filter
+ or weight the data based on certain conditions defined in
+ the Mask objects.
masks_database : bool, optional
- Determines whether to include masks from databases in the aggregation process.
- If set to `True`, masks from databases will be included along with any masks provided as function arguments.
- If set to `False`, only the masks provided as function argruments will be applied
+ Determines whether to include masks from databases in the
+ aggregation process. If set to `True`, masks from databases
+ will be included along with any masks provided as function
+ arguments. If set to `False`, only the masks provided as
+ function arguments will be applied.
Returns
-------
pd.DataFrame
- The `aggregate` method returns a pandas DataFrame that has been cleaned up and aggregated based
- on the specified parameters and input data. The method performs aggregation over component
- fields and cases fields, applies weights based on masks, drops rows with NaN weights, aggregates
- with weights, inserts reference variables, sorts columns and rows, rounds values, and inserts
- units before returning the final cleaned and aggregated DataFrame.
-
- '''
+ The `aggregate` method returns a pandas DataFrame that has
+ been cleaned up and aggregated based on the specified
+ parameters and input data. The method performs aggregation
+ over component fields and cases fields, applies weights
+ based on masks, drops rows with NaN weights, aggregates with
+ weights, inserts reference variables, sorts columns and
+ rows, rounds values, and inserts units before returning the
+ final cleaned and aggregated DataFrame.
+ """
# get selection
- selected, var_units, var_references = self._select(override,
- extrapolate_period,
- drop_singular_fields,
- **field_vals_select)
+ selected, var_units, var_references = self._select(
+ override, extrapolate_period,
+ drop_singular_fields, **field_vals_select
+ )
# compile masks from databases and function argument into one list
if masks is not None and any(not isinstance(m, Mask) for m in masks):
- raise Exception("Function argument 'masks' must contain a list of posted.masking.Mask objects.")
+ raise Exception("Function argument 'masks' must contain a list of "
+ "posted.masking.Mask objects.")
masks = (self._masks if masks_database else []) + (masks or [])
# aggregation
@@ -786,9 +874,11 @@ def aggregate(self, override: Optional[dict[str, str]] = None,
agg = [agg]
for a in agg:
if not isinstance(a, str):
- raise Exception(f"Field ID in argument 'agg' must be a string but found: {a}")
+ raise Exception(f"Field ID in argument 'agg' must be a "
+ f"string but found: {a}")
if not any(a == col_id for col_id in self._fields):
- raise Exception(f"Field ID in argument 'agg' is not a valid field: {a}")
+ raise Exception(f"Field ID in argument 'agg' is not a "
+ f"valid field: {a}")
# aggregate over component fields
group_cols = [
@@ -823,7 +913,10 @@ def aggregate(self, override: Optional[dict[str, str]] = None,
out = rows \
.groupby(group_cols, dropna=False)[['value', 'weight']] \
.apply(lambda cols: pd.Series({
- 'value': np.average(cols['value'], weights=cols['weight']),
+ 'value': np.average(
+ cols['value'],
+ weights=cols['weight'],
+ ),
}))
# add to return list
@@ -843,14 +936,19 @@ def aggregate(self, override: Optional[dict[str, str]] = None,
'value': [1.0],
} | {
col_id: ['*']
- for col_id, field in self._fields.items() if col_id in aggregated
+ for col_id, field in self._fields.items()
+ if col_id in aggregated
}))
if agg_append:
agg_append = pd.concat(agg_append).reset_index(drop=True)
for col_id, field in self._fields.items():
if col_id not in aggregated:
continue
- agg_append = field.select_and_expand(agg_append, col_id, aggregated[col_id].unique().tolist())
+ agg_append = field.select_and_expand(
+ agg_append,
+ col_id,
+ aggregated[col_id].unique().tolist(),
+ )
else:
agg_append = None
@@ -858,28 +956,39 @@ def aggregate(self, override: Optional[dict[str, str]] = None,
return self._cleanup(pd.concat([aggregated, agg_append]), var_units)
# clean up: sort columns and rows, round values, insert units
- def _cleanup(self, df: pd.DataFrame, var_units: dict[str, str]) -> pd.DataFrame:
+ def _cleanup(self, df: pd.DataFrame, var_units: dict[str, str]) \
+ -> pd.DataFrame:
# sort columns and rows
- cols_sorted = (
- [col_id for col_id, field in self._fields.items() if isinstance(field, CustomFieldDefinition)] +
- ['source', 'variable', 'reference_variable', 'region', 'period', 'value']
- )
+ cols_sorted = ([
+ col_id for col_id, field in self._fields.items()
+ if isinstance(field, CustomFieldDefinition)
+ ] + [
+ 'source', 'variable', 'reference_variable',
+ 'region', 'period', 'value',
+ ])
cols_sorted = [c for c in cols_sorted if c in df.columns]
df = df[cols_sorted]
- df = df \
- .sort_values(by=[c for c in cols_sorted if c in df and c != 'value']) \
+ df = df.sort_values(by=[
+ c for c in cols_sorted
+ if c in df and c != 'value'
+ ]) \
.reset_index(drop=True)
- # round values
+ # Round values
df['value'] = df['value'].apply(
- lambda cell: cell if pd.isnull(cell) else round(cell, sigfigs=4, warn=False)
+ lambda cell: (
+ cell
+ if pd.isnull(cell) else
+ round(cell, sigfigs=4, warn=False)
+ )
)
- # insert column containing units
+ # Insert column containing units
df.insert(df.columns.tolist().index('value'), 'unit', np.nan)
if 'reference_variable' in df:
df['unit'] = df.apply(
- lambda row: combine_units(var_units[row['variable']], var_units[row['reference_variable']])
+ lambda row: combine_units(var_units[row['variable']],
+ var_units[row['reference_variable']])
if not pd.isnull(row['reference_variable']) else
var_units[row['variable']],
axis=1,
diff --git a/python/posted/sources.py b/python/posted/sources.py
index 1c6b9a6..5577faa 100755
--- a/python/posted/sources.py
+++ b/python/posted/sources.py
@@ -1,4 +1,5 @@
from pathlib import Path
+from typing import Optional
import pandas as pd
from pybtex.database.input import bibtex
@@ -7,33 +8,40 @@
from posted.path import databases
-def format_sources(bib_data, style, form, exclude_fields = None):
- '''
+def format_sources(
+ bib_data,
+ style,
+ form,
+ exclude_fields: Optional[list] = None):
+ """
Takes bibliographic data, a citation style, a citation form, and
- optional excluded fields, and returns a formatted list of sources based on the specified style and
- form.
+ optional excluded fields, and returns a formatted list of sources
+ based on the specified style and form.
Parameters
----------
bib_data
- Contains bibliographic information, such as author, title, references or citations.
+ Contains bibliographic information, such as author, title,
+ references or citations.
style
Specifies the formatting style for the bibliography entries.
form
- Specifies the format in which the citation should be rendered. It determines how the citation information will be displayed or
+ Specifies the format in which the citation should be rendered.
+ It determines how the citation information will be displayed or
structured in the final output.
exclude_fields
- Specifies a list of fields that should be excluded from the final output. These fields will be removed from the entries before
- formatting and returning the citation data.
+ Specifies a list of fields that should be excluded from the
+ final output. These fields will be removed from the entries
+ before formatting and returning the citation data.
Returns
-------
list[dict]
- A list of dictionaries containing the identifier, citation, DOI, and URL information for each entry
- in the bibliography data, formatted according to the specified style and form, with any excluded
- fields removed.
-
- '''
+ A list of dictionaries containing the identifier, citation,
+ DOI, and URL information for each entry in the bibliography
+ data, formatted according to the specified style and form,
+ with any excluded fields removed.
+ """
exclude_fields = exclude_fields or []
if exclude_fields:
@@ -56,19 +64,19 @@ def format_sources(bib_data, style, form, exclude_fields = None):
return ret
-
def dump_sources(file_path: str | Path):
- '''Parses BibTeX files, formats the data, and exports it into a CSV or Excel
- file using pandas.
+ """
+ Parses BibTeX files, formats the data, and exports it into a CSV
+ or Excel file using pandas.
Parameters
----------
file_path : str | Path
- Path to the file where the formatted sources should be exported to.
- It can be either a string representing the file path or a `Path` object
- from the `pathlib` module.
+ Path to the file where the formatted sources should be exported
+ to. It can be either a string representing the file path or a
+ `Path` object from the `pathlib` module.
- '''
+ """
# convert string to pathlib.Path if necessary
if isinstance(file_path, str):
file_path = Path(file_path)
diff --git a/python/posted/team.py b/python/posted/team.py
index e826ee6..738d7ac 100644
--- a/python/posted/team.py
+++ b/python/posted/team.py
@@ -51,14 +51,20 @@ class AbstractManipulation:
def perform(self, df: pd.DataFrame) -> pd.DataFrame:
pass
- def _varsplit(self, df: pd.DataFrame, cmd: Optional[str] = None, regex: Optional[str] = None) -> pd.DataFrame:
- # check that precisely one of the two arguments (cmd and regex) is provided
+ def _varsplit(self,
+ df: pd.DataFrame,
+ cmd: Optional[str] = None,
+ regex: Optional[str] = None) -> pd.DataFrame:
+ # Check that precisely one of the two arguments (cmd and regex)
+ # is provided.
if cmd is not None and regex is not None:
- raise Exception('Only one of the two arguments may be provided: cmd or regex.')
+ raise Exception('Only one of the two arguments may be provided: '
+ 'cmd or regex.')
if cmd is None and regex is None:
- raise Exception('Either a command or a regex string must be provided.')
+ raise Exception('Either a command or a regex string must be '
+ 'provided.')
- # determine regex from cmd if necessary
+ # Determine regex from cmd if necessary.
if regex is None:
regex = '^' + r'\|'.join([
rf'(?P<{t[1:]}>[^|]*)' if t[0] == '?' else
@@ -67,6 +73,7 @@ def _varsplit(self, df: pd.DataFrame, cmd: Optional[str] = None, regex: Optional
for t in cmd.split('|')
]) + '$'
+ # Extract new columns from existing.
cols_extracted = df.columns.str.extract(regex)
df_new = df[df.columns[cols_extracted.notnull().all(axis=1)]]
df_new.columns = (
@@ -74,6 +81,8 @@ def _varsplit(self, df: pd.DataFrame, cmd: Optional[str] = None, regex: Optional
if len(cols_extracted.columns) > 1 else
cols_extracted.dropna().iloc[:, 0]
)
+
+ # Return new dataframe.
return df_new
@@ -81,85 +90,153 @@ def _varsplit(self, df: pd.DataFrame, cmd: Optional[str] = None, regex: Optional
@pd.api.extensions.register_dataframe_accessor('team')
class TEAMAccessor:
def __init__(self, df: pd.DataFrame):
- # check that column axis has only one level
+ # Check that column axis has only one level.
if df.columns.nlevels > 1:
- raise ValueError('Can only use .team accessor with team-like dataframes that contain only one column '
- 'layer.')
+ raise ValueError('Can only use .team accessor with team-like '
+ 'dataframes that contain only one column layer.')
- # check that at least variable, unit, and value are among the columns
+ # Check that at least variable, unit, and value are among the columns.
if not all(c in df for c in ('variable', 'unit', 'value')):
- raise ValueError('Can only use .team accessor with team-like dataframes that contain at least the '
- 'variable, unit, and value columns.')
+ raise ValueError('Can only use .team accessor with team-like '
+ 'dataframes that contain at least the variable, '
+ 'unit, and value columns.')
- # warn if 'unfielded' column exists
+ # Warn if 'unfielded' column exists.
if 'unfielded' in df.columns:
- warnings.warn("Having a column named 'unfielded' in the dataframe may result in unexpected behaviour.")
+ warnings.warn("Having a column named 'unfielded' in the dataframe "
+ "may result in unexpected behaviour.")
- # store arguments
+ # Store arguments as member fields.
self._df = df
- self._fields = [c for c in self._df if c not in ('variable', 'unit', 'value')]
+ self._fields = [
+ c for c in self._df
+ if c not in ('variable', 'unit', 'value')
+ ]
@property
def fields(self):
return self._fields
- # explode rows with nan entries
- def explode(self, fields: Optional[str | list[str]] = None) -> pd.DataFrame:
+ def explode(self,
+ fields: Optional[str | list[str]] = None) -> pd.DataFrame:
+ """
+ Explode rows with nan entries.
+
+ Parameters
+ ----------
+ fields : str | list[str] | None
+ The list of fields to explode.
+
+ Returns
+ -------
+ pd.DataFrame
+ The dataframe with nan entries in the respective fields
+ exploded.
+ """
df = self._df
- fields = self._fields if fields is None else [fields] if isinstance(fields, str) else fields
+ fields = (
+ self._fields if fields is None else [fields]
+ if isinstance(fields, str) else fields
+ )
for field in fields:
df = df \
.assign(**{field: lambda df: df[field].apply(
- lambda cell: df[field].dropna().unique().tolist() if pd.isnull(cell) else cell
+ lambda cell: df[field].dropna().unique().tolist()
+ if pd.isnull(cell) else cell
)}) \
.explode(field)
return df.reset_index(drop=True)
- # for grouping rows by fields (region, period, other...), including an `explode` statement for `nan` entries
def groupby_fields(self, **kwargs) -> DataFrameGroupBy:
+ """
+ Group by field columns (region, period, other...). Fields with
+ rows that contain nan entries will be 'exploded' first.
+
+ Parameters
+ ----------
+ kwargs
+ Passed on to pd.DataFrame.groupby.
+
+ Returns
+ -------
+ pd.DataFrameGroupBy
+ The grouped dataframe rows.
+ """
if 'by' in kwargs:
- raise Exception("The 'by' argument is determined by team, you cannot provide it manually.")
+ raise Exception("The 'by' argument is determined by team, you "
+ "cannot provide it manually.")
return self.explode().groupby(by=self._fields, **kwargs)
- # pivot posted-formatted dataframe from long to wide (variables as columns)
def pivot_wide(self):
- # explode
+ """
+ Pivot dataframe wide, such that column names are variables.
+
+ Returns
+ -------
+ pd.DataFrame
+ The original dataframe in pivot mode.
+ """
ret = self.explode()
- # check units are harmonised across variables before pivot
+ # Check units are harmonised across variables before pivot.
units = ret[['variable', 'unit']].drop_duplicates()
if not units['variable'].is_unique:
- duplicate_units = units.loc[units['variable'].duplicated()]['variable'].tolist()
- raise Exception(f"Cannot pivot wide on a dataframe where variables have multiple units: "
+ duplicate_units = units.loc[units['variable'].duplicated()] \
+ .loc[:, 'variable'] \
+ .tolist()
+ raise Exception(f"Cannot pivot wide on a dataframe where "
+ f"variables have multiple units: "
f"{', '.join(duplicate_units)}")
- # create dummy field if non exists
+ # Create dummy field if non exists.
if not self._fields:
ret = ret.assign(unfielded=0)
fields = self._fields + ['unfielded']
else:
fields = self._fields
- # pivot dataframe
+ # Pivot dataframe.
ret = ret.pivot(
index=fields,
columns=['variable', 'unit'],
values='value',
)
- # check unit exists for all columns
+ # Check unit exists for all columns.
if ret.columns.get_level_values(level='unit').isna().any():
- raise Exception('Unit column may not contain NaN entries. Please use "dimensionless" or "No Unit" if the '
- 'variable has no unit.')
+ raise Exception('Unit column may not contain NaN entries. Please '
+ 'use "dimensionless" or "No Unit" if the variable '
+ 'has no unit.')
return ret.pint.quantify()
# for performing analyses
- def perform(self, *manipulations: AbstractManipulation, dropna: bool = False, only_new: bool = False):
- # pivot dataframe before manipulation
+ def perform(self,
+ *manipulations: AbstractManipulation,
+ dropna: bool = False,
+ only_new: bool = False):
+ """
+ Perform manipulation(s).
+
+ Parameters
+ ----------
+ manipulations : AbstractManipulation
+ The manipulations to apply to the dataframe.
+ dropna : bool
+ Whether to drop nan rows at the end.
+ only_new : bool
+ Whether to only keep new variables.
+
+ Returns
+ -------
+ pd.DataFrame
+ The dataframe that underwent the manipulation(s).
+ """
+ # Pivot dataframe before manipulation.
df_pivot = self.pivot_wide()
- # perform analysis or manipulation and bring rows back to original long dataframe format
+ # Perform analysis or manipulation and bring rows back to
+ # original long dataframe format.
for manipulation in manipulations:
original_index = df_pivot.index
df_pivot = manipulation.perform(df_pivot)
@@ -168,44 +245,75 @@ def perform(self, *manipulations: AbstractManipulation, dropna: bool = False, on
if not df_pivot.index.equals(original_index):
raise Exception('Manipulation may not change the index.')
- # ensure that the axis label still exists before melt
+ # Ensure that the axis label still exists before melt.
df_pivot.rename_axis('variable', axis=1, inplace=True)
- # pivot back
+ # Pivot back.
ret = df_pivot \
.pint.dequantify() \
.melt(ignore_index=False) \
.reset_index()
- # drop rows with na entries in unit or value columns
+ # Drop rows with nan entries in unit or value columns.
if dropna:
ret.dropna(subset=['unit', 'value'], inplace=True)
- # keep only new variables
+ # Keep only new variables if requested.
if only_new:
ret = ret.loc[~ret['variable'].isin(self._df['variable'].unique())]
- # drop unfielded if exists
+ # Drop column called 'unfielded' if it exists.
if 'unfielded' in ret.columns:
ret = ret.drop(columns='unfielded')
- # return
+ # Return dataframe.
return ret.reset_index(drop=True)
- # for splitting variable components into separate columns
- def varsplit(self, cmd: Optional[str] = None, regex: Optional[str] = None, target: str = 'variable',
- new: Optional[str | bool] = True, keep_unmatched: bool = False):
- # check that precisely one of the two arguments (cmd and regex) is provided
+ def varsplit(self,
+ cmd: Optional[str] = None,
+ regex: Optional[str] = None,
+ target: str = 'variable',
+ new: Optional[str | bool] = True,
+ keep_unmatched: bool = False) -> pd.DataFrame:
+ """
+ Split variable components separated by pipe characters into
+ separate columns. The pattern must either be provided as
+ `cmd` or as `regex`.
+
+ Parameters
+ ----------
+ cmd : str
+ A command to interpret into a regex.
+ regex : str
+ A direct regex.
+ target : str
+ (Optional) The name of the column where the new
+ variable will be stored.
+ new : str
+ (Optional) The new variable name.
+ keep_unmatched : bool
+ Whether or not to keep unmatched rows.
+
+ Returns
+ -------
+ pd.DataFrame
+ The dataframe that contains the new split variables.
+ """
+ # Check that precisely one of the two arguments (either `cmd`
+ # or `regex`) is provided.
if cmd is not None and regex is not None:
- raise Exception('Only one of the two arguments may be provided: cmd or regex.')
+ raise Exception(
+ 'Only one of the two arguments may be provided: cmd or regex.')
if cmd is None and regex is None:
- raise Exception('Either a command or a regex string must be provided.')
+ raise Exception(
+ 'Either a command or a regex string must be provided.')
- # check that target is in columns of dataframe
+ # Check that target is in columns of dataframe.
if target not in self._df.columns:
- raise Exception(f"Could not find column of name '{target}' in dataframe.")
+ raise Exception(f"Could not find column of name '{target}' in "
+ f"dataframe.")
- # determine regex from cmd if necessary
+ # Determine regex from cmd if necessary.
if regex is None:
regex = '^' + r'\|'.join([
rf'(?P<{t[1:]}>[^|]*)' if t[0] == '?' else
@@ -214,75 +322,135 @@ def varsplit(self, cmd: Optional[str] = None, regex: Optional[str] = None, targe
for t in cmd.split('|')
]) + '$'
- # determine value of new variable column from arguments
+ # Determine value of new variable column from arguments.
if new is False:
new = None
elif new is True:
if cmd is None:
new = None
else:
- new = '|'.join([t for t in cmd.split('|') if t[0] not in ('?', '*')])
+ new = '|'.join([
+ t for t in cmd.split('|')
+ if t[0] not in ('?', '*')
+ ])
- # create dataframe to be returned by applying regex to variable column and dropping unmatched rows
+ # Create dataframe to be returned by applying regex to variable
+ # column and dropping unmatched rows.
matched = self._df[target].str.extract(regex)
- # drop unmatched rows if requested
+ # Drop unmatched rows if requested.
is_unmatched = matched.isna().any(axis=1)
matched = matched.drop(index=matched.loc[is_unmatched].index)
- # assign new variable column and drop if all are nan
+ # Assign new variable column and drop if all are nan.
if target not in matched:
cond = matched.notnull().any(axis=1)
matched[target] = self._df[target]
matched.loc[cond, target] = new or np.nan
if new is None:
- warnings.warn('New target column could not be set automatically.')
+ warnings.warn('New target column could not be set '
+ 'automatically.')
- # drop variable column if all nan
+ # Drop variable column if all nan.
if matched[target].isnull().all():
matched.drop(columns=target, inplace=True)
- # combine with original dataframe
+ # Combine with original dataframe.
if keep_unmatched:
- df_combine = self._df.assign(**{target: lambda df: df[target].where(is_unmatched)})
+ df_combine = self._df.assign(**{
+ target: lambda df: df[target].where(is_unmatched)
+ })
else:
df_combine = self._df.loc[matched.index].drop(columns=target)
ret = matched.combine_first(df_combine)
- # sort columns
+ # Sort columns.
order = matched.columns.tolist() + self._df.columns.tolist()
- ret.sort_index(key=lambda cols: [order.index(c) for c in cols], axis=1, inplace=True)
+ ret.sort_index(
+ key=lambda cols: [order.index(c) for c in cols],
+ axis=1,
+ inplace=True,
+ )
- # return
+ # Return dataframe.
return ret
- # combine columns into new variable
- def varcombine(self, cmd: str | Callable, keep_cols: bool = False, target: str = 'variable'):
+ def varcombine(self,
+ cmd: str | Callable,
+ keep_cols: bool = False,
+ target: str = 'variable') -> pd.DataFrame:
+ """
+ Combine columns into new variable (or other column).
+
+ Parameters
+ ----------
+ cmd : str | Callable
+ How the new variable (or other column) should be assembled.
+ keep_cols : bool
+ Whether to keep the used columns.
+ target : str
+ (Optional) The name of the target column. By default, this
+ will be called `variable`.
+
+ Returns
+ -------
+ pd.DataFrame
+ The updated dataframe.
+ """
ret = self._df.assign(**{
- target: self._df.apply(lambda row: cmd.format(**row) if isinstance(cmd, str) else cmd(row), axis=1),
+ target: self._df.apply(lambda row:
+ cmd.format(**row) if isinstance(cmd, str) else cmd(row),
+ axis=1),
})
return ret if keep_cols else ret.filter([
col for col in ret
- if col == target or (isinstance(cmd, Callable) or f"{{{col}}}" not in cmd)
+ if col == target or
+ (isinstance(cmd, Callable) or f"{{{col}}}" not in cmd)
])
- # convert units
- def unit_convert(self, to: str | pint.Unit | dict[str, str | pint.Unit], flow_id: Optional[str] = None):
+ def unit_convert(self,
+ to: str | pint.Unit | dict[str, str | pint.Unit],
+ flow_id: Optional[str] = None):
+ """
+ Convert units in dataframe.
+
+ Parameters
+ ----------
+ to : str | pint.Unit | dict[str, str | pint.Unit]
+ The unit to convert to. This is either one unit for all rows
+ or a dict that maps variables to units.
+ flow_id : str
+ (Optional) The flow ID for converting flow units.
+
+ Returns
+ -------
+ pd.DataFrame
+ The dataframe with updated units.
+ """
+
return self._df.assign(
- unit_to=to if not isinstance(to, dict) else self._df.apply(
- lambda row: to[row['variable']] if row['variable'] in to else row['unit'], axis=1,
- ),
- value=lambda df: df.apply(
- lambda row: row['value'] * unit_convert(row['unit'], row['unit_to'], flow_id=flow_id), axis=1,
- ),
- ) \
+ unit_to=to if not isinstance(to, dict) else self._df.apply(
+ lambda row: (
+ to[row['variable']]
+ if row['variable'] in to else
+ row['unit']
+ ), axis=1,
+ ),
+ value=lambda df: df.apply(
+ lambda row: row['value'] * unit_convert(
+ row['unit'], row['unit_to'], flow_id=flow_id),
+ axis=1,
+ ),
+ ) \
.drop(columns='unit') \
.rename(columns={'unit_to': 'unit'})
-# new variable can be calculated through expression assignments or keyword assignments
-# expression assignments are of form "`a` = `b` + `c`" and are based on the pandas eval functionality
-# keyword assignment must be int, float, string, or a function to be called to assign to the variable defined as key
+# new variable can be calculated through expression assignments or
+# keyword assignments expression assignments are of form
+# "`a` = `b` + `c`" and are based on the pandas eval functionality
+# keyword assignment must be int, float, string, or a function to
+# be called to assign to the variable defined as key
ExprAssignment: TypeAlias = str
KeywordAssignment: TypeAlias = int | float | str | Callable
@@ -292,17 +460,22 @@ class CalcVariable(AbstractManipulation):
_expr_assignments: tuple[ExprAssignment]
_kw_assignments: dict[str, KeywordAssignment]
- def __init__(self, *expr_assignments: ExprAssignment, **kw_assignments: KeywordAssignment):
+ def __init__(self,
+ *expr_assignments: ExprAssignment,
+ **kw_assignments: KeywordAssignment):
self._expr_assignments = expr_assignments
self._kw_assignments = kw_assignments
# check all supplied arguments are valid
for expr_assignment in self._expr_assignments:
if not isinstance(expr_assignment, str):
- raise Exception(f"Expression assignments must be of type str, but found: {type(expr_assignment)}")
+ raise Exception(f"Expression assignments must be of type str, "
+ f"but found: {type(expr_assignment)}")
for kw_assignment in self._kw_assignments.values():
- if not (isinstance(kw_assignment, int | float | str) or callable(kw_assignment)):
- raise Exception(f"Keyword assignments must be of type int, float, string, or callable, but found: "
+ if not (isinstance(kw_assignment, int | float | str) or
+ callable(kw_assignment)):
+ raise Exception(f"Keyword assignments must be of type int, "
+ f"float, string, or callable, but found: "
f"{type(kw_assignment)}")
def perform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -327,14 +500,20 @@ def __init__(self,
sc_demand: Optional[dict[str, dict[str, pint.Quantity]]] = None,
):
if process_diagram is None and process_tree is None:
- raise Exception('Either the process_diagram or the process_tree argument must be provided.')
+ raise Exception('Either the process_diagram or the process_tree '
+ 'argument must be provided.')
if process_diagram is not None and process_tree is not None:
- raise Exception('The process_diagram and process_tree arguments cannot both be provided.')
+ raise Exception('The process_diagram and process_tree arguments '
+ 'cannot both be provided.')
self._name = name
self._demand = demand
self._sc_demand = sc_demand
- self._proc_graph = self._read_diagram(process_diagram) if process_diagram is not None else process_tree
+ self._proc_graph = (
+ self._read_diagram(process_diagram)
+ if process_diagram is not None else
+ process_tree
+ )
self._flows = list({
flow
for proc_edges in self._proc_graph.values()
@@ -354,8 +533,9 @@ def proc_graph(self) -> dict[str, dict[str, list[str]]]:
# get process graph as igraph object for plotting
def igraph(self) -> Tuple[Graph, Layout]:
if not HAS_IGRAPH:
- raise ImportError("Need to install the `igraph` package first. Please run `pip install igraph` or `poetry "
- "add igraph`.")
+ raise ImportError("Need to install the `igraph` package first. "
+ "Please run `pip install igraph` or `poetry add "
+ "igraph`.")
procs = list(self._proc_graph.keys())
graph = igraph.Graph(
@@ -368,7 +548,11 @@ def igraph(self) -> Tuple[Graph, Layout]:
],
)
graph.vs['name'] = procs
- graph.es['name'] = [flow for p1 in procs for flow in self._proc_graph[p1]]
+ graph.es['name'] = [
+ flow
+ for p1 in procs
+ for flow in self._proc_graph[p1]
+ ]
layout = graph.layout_reingold_tilford(root=[len(graph.vs) - 1])
layout.rotate(angle=90)
@@ -384,7 +568,8 @@ def _reduce_subdiagram(subdiagram: str) -> tuple[str, str, str]:
if len(components) == 1:
processes.append((token.strip(' '), None))
elif len(components) == 2:
- processes.append((components[0].strip(' '), components[1].strip(' ')))
+ processes.append((components[0].strip(' '),
+ components[1].strip(' '),))
else:
raise Exception(f"Too many consecutive `->` in diagram.")
@@ -392,7 +577,8 @@ def _reduce_subdiagram(subdiagram: str) -> tuple[str, str, str]:
proc, flow = processes[i]
proc2 = processes[i + 1][0] if i + 1 < len(processes) else None
if flow is None and i + 1 < len(processes):
- raise Exception(f"Flow must be provided for processes feeding into downstream processes: {subdiagram}")
+ raise Exception(f"Flow must be provided for processes feeding "
+ f"into downstream processes: {subdiagram}")
yield proc, flow, proc2
# read the full diagram
@@ -408,7 +594,9 @@ def _read_diagram(diagram: str) -> dict[str, dict[str, list[str]]]:
if proc2 is not None:
out[proc][flow].append(proc2)
else:
- out[proc] |= {flow: ([proc2] if proc2 is not None else [])}
+ out[proc] |= {
+ flow: ([proc2] if proc2 is not None else [])
+ }
else:
out[proc] = {flow: ([proc2] if proc2 is not None else [])}
if proc2 is not None and proc2 not in out:
@@ -426,7 +614,8 @@ def _perform_row(self, row: pd.Series) -> pd.Series:
[
+ row[f"Tech|{proc1}|Output|{flow}"].m
if proc1 == proc2 else
- - row[f"Tech|{proc2}|Input|{flow}"].to(row[f"Tech|{proc1}|Output|{flow}"].u).m
+ - row[f"Tech|{proc2}|Input|{flow}"] \
+ .to(row[f"Tech|{proc1}|Output|{flow}"].u).m
if proc2 in proc1_flow_targets else
0.0
for proc2 in self._proc_graph
@@ -437,7 +626,8 @@ def _perform_row(self, row: pd.Series) -> pd.Series:
# obtain demand
d = np.array([
- self._demand[proc1][flow].to(row[f"Tech|{proc1}|Output|{flow}"].u).m
+ self._demand[proc1][flow]
+ .to(row[f"Tech|{proc1}|Output|{flow}"].u).m
if proc1 in self._demand and flow in self._demand[proc1] else
0.0
for proc1 in self._proc_graph
@@ -462,7 +652,8 @@ def _perform_row(self, row: pd.Series) -> pd.Series:
d = np.concatenate([
d,
[
- self._sc_demand[proc1][flow].to(row[f"Tech|{proc1}|Output|{flow}"].u).m
+ self._sc_demand[proc1][flow].to(
+ row[f"Tech|{proc1}|Output|{flow}"].u).m
for proc1 in self._sc_demand
for flow in self._sc_demand[proc1]
]
@@ -487,26 +678,43 @@ def _perform_row(self, row: pd.Series) -> pd.Series:
# calculate levelised cost of X
class LCOX(AbstractManipulation):
+ """
+ Calculate levelised cost of X (LCOX).
+ """
_name: str
_reference: str
_process: str
_interest_rate: Optional[float]
_book_lifetime: Optional[float]
- def __init__(self, reference: str, process: Optional[str] = None, process_chain: Optional[str] = None,
- name: Optional[str] = None, interest_rate: Optional[float] = None,
+ def __init__(self,
+ reference: str,
+ process: Optional[str] = None,
+ process_chain: Optional[str] = None,
+ name: Optional[str] = None,
+ interest_rate: Optional[float] = None,
book_lifetime: Optional[float] = None):
if process is None and process_chain is None:
- raise Exception('Either process or vc must be provided as an argument.')
+ raise Exception('Either process or vc must be provided as an '
+ 'argument.')
elif process is not None and process_chain is not None:
- raise Exception('Only one of process and vc must be provided as an argument.')
+ raise Exception('Only one of process and vc must be provided as an '
+ 'argument.')
self._reference = reference
self._process = process
self._process_chain = process_chain
self._name = name if name is not None else process if process is not None else process_chain
- self._interest_rate = interest_rate * U('') if isinstance(interest_rate, int | float) else interest_rate
- self._book_lifetime = book_lifetime * U('a') if isinstance(book_lifetime, int | float) else book_lifetime
+ self._interest_rate = (
+ interest_rate * U('')
+ if isinstance(interest_rate, int | float) else
+ interest_rate
+ )
+ self._book_lifetime = (
+ book_lifetime * U('a')
+ if isinstance(book_lifetime, int | float) else
+ book_lifetime
+ )
# perform
def perform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -518,18 +726,26 @@ def perform(self, df: pd.DataFrame) -> pd.DataFrame:
return pd.concat([df, ret], axis=1)
else:
# get functional units
- func_units = self._varsplit(df, f"Process Chain|{self._process_chain}|Functional Unit|?process")
+ func_units = self._varsplit(df, f"Process Chain|"
+ f"{self._process_chain}|"
+ f"Functional Unit|?process"
+ )
if func_units.empty:
- raise Exception(f"Process chain '{self._process_chain}' could not be found. Make sure you performed "
- f"performed the process chain manipulation on the dataframe first to determine the "
- f"functional units and make sure the spelling of the process chain is correct.")
+ raise Exception(f"Process chain '{self._process_chain}' could "
+ f"not be found. Make sure you performed the "
+ f"process chain manipulation on the dataframe "
+ f"first to determine the functional units and "
+ f"make sure the spelling of the process chain "
+ f"is correct.")
# loop over processes in process chain
ret_list = []
for process in func_units.columns:
- # calculate levelised cost of process, prepend "LCOX|{name}|{process}|" before variable names, divide
+ # calculate levelised cost of process, prepend
+ # "LCOX|{name}|{process}|" before variable names, divide
# by reference, and multiply by functional unit
ret = self.calc_cost(df, process) \
- .rename(columns=lambda var: f"LCOX|{self._name}|{process}|{var}") \
+ .rename(columns=lambda var: f"LCOX|{self._name}|"
+ f"{process}|{var}") \
.apply(lambda col: col / df[f"Tech|{self._reference}"] * func_units[process])
ret_list.append(ret)
return pd.concat([df] + ret_list, axis=1)
@@ -538,19 +754,26 @@ def perform(self, df: pd.DataFrame) -> pd.DataFrame:
def calc_cost(self, df: pd.DataFrame, process: str) -> pd.DataFrame:
tech = self._varsplit(df, f"Tech|{process}|?variable")
prices = self._varsplit(df, 'Price|?io')
- iocaps = self._varsplit(df, regex=fr"Tech\|{re.escape(process)}\|((?:Input|Output) Capacity\|.*)")
- ios = self._varsplit(df, regex=fr"Tech\|{re.escape(process)}\|((?:Input|Output)\|.*)")
+ iocaps = self._varsplit(df, regex=fr"Tech\|{re.escape(process)}\|"
+ fr"((?:Input|Output) Capacity\|.*)",
+ )
+ ios = self._varsplit(df, regex=fr"Tech\|{re.escape(process)}\|"
+ fr"((?:Input|Output)\|.*)",
+ )
# determine reference capacity and reference of that reference capacity for CAPEX and OPEX Fixed
if any(c in tech for c in ('CAPEX', 'OPEX Fixed')):
try:
cap = iocaps.iloc[:, 0]
- capref = ios[re.sub(r'(Input|Output) Capacity', r'\1', cap.name)]
+ c = re.sub(r'(Input|Output) Capacity', r'\1', cap.name)
+ capref = ios[c]
except IndexError:
- warnings.warn('Could not find a reference capacity for CAPEX and OPEX columns.')
+ warnings.warn('Could not find a reference capacity for CAPEX '
+ 'and OPEX columns.')
cap = capref = None
except KeyError:
- warnings.warn('Could not find reference matching the reference capacity.')
+ warnings.warn('Could not find reference matching the reference '
+ 'capacity.')
capref = None
else:
cap = capref = None
@@ -583,29 +806,36 @@ def calc_cost(self, df: pd.DataFrame, process: str) -> pd.DataFrame:
continue
# inputs are counted as costs, outputs are counted as revenues
sign = +1 if io_type == 'Input' else -1
- ret[f"{io_type} {'Cost' if io_type == 'Input' else 'Revenue'}|{io_flow}"] = sign * ios[io] * prices[io_flow]
+ c = (f"{io_type} {'Cost' if io_type == 'Input' else 'Revenue'}|"
+ f"{io_flow}")
+ ret[c] = sign * ios[io] * prices[io_flow]
# warn about unused variables
if unused:
- warnings.warn(f"The following inputs/outputs are not used in LCOX, because they are neither the reference "
- f"nor is an associated price given: {', '.join(unused)}")
+ warnings.warn(f"The following inputs/outputs are not used in "
+ f"LCOX, because they are neither the reference "
+ f"nor is an associated price given: "
+ f"{', '.join(unused)}")
return ret
-# calculate fuel-switching carbon price
class FSCP(AbstractManipulation):
+ """
+ Calculate fuel-switching carbon price (FSCP).
+ """
_fuels: tuple[str]
def __init__(self, *fuels: str):
self._fuels = fuels
- # perform
def perform(self, df: pd.DataFrame) -> pd.DataFrame:
for id_x, fuel_x in enumerate(self._fuels):
for id_y, fuel_y in enumerate(self._fuels):
if id_x < id_y:
- df[f"FSCP|{fuel_x} to {fuel_y}"] = (df[f"Cost|{fuel_y}"] - df[f"Cost|{fuel_x}"]) / (
- df[f"GHGI|{fuel_x}"] - df[f"GHGI|{fuel_y}"])
+ df[f"FSCP|{fuel_x} to {fuel_y}"] = (
+ (df[f"Cost|{fuel_y}"] - df[f"Cost|{fuel_x}"])
+ / (df[f"GHGI|{fuel_x}"] - df[f"GHGI|{fuel_y}"])
+ )
return df
diff --git a/python/posted/tedf.py b/python/posted/tedf.py
index 7b7fa7e..2d55a3f 100644
--- a/python/posted/tedf.py
+++ b/python/posted/tedf.py
@@ -11,7 +11,8 @@
class TEDFInconsistencyException(Exception):
- """Exception raised for inconsistencies in TEDFs.
+ """
+ Exception raised for inconsistencies in TEDFs.
Attributes:
message -- message explaining the inconsistency
@@ -26,7 +27,7 @@ def __init__(self, message: str = "Inconsistency detected", row_id: None | int =
self.col_id: None | str = col_id
self.file_path: None | Path = file_path
- # add tokens at the end of the error message
+ # Add tokens at the end of the error message.
message_tokens = []
if file_path is not None:
message_tokens.append(f"file \"{file_path}\"")
@@ -35,10 +36,12 @@ def __init__(self, message: str = "Inconsistency detected", row_id: None | int =
if col_id is not None:
message_tokens.append(f"in column \"{col_id}\"")
- # compose error message from tokens
+ # Compose error message from tokens.
exception_message: str = message
if message_tokens:
- exception_message += f"\n " + (", ".join(message_tokens)).capitalize()
+ exception_message += (
+ f"\n " + (", ".join(message_tokens)).capitalize()
+ )
super().__init__(exception_message)
@@ -58,10 +61,9 @@ def new_inconsistency(raise_exception: bool, **kwargs) -> TEDFInconsistencyExcep
return exception
-
class TEBase:
"""
- Base Class for Technoeconomic Data
+ Base class for techno-economic data/
Parameters
----------
@@ -69,20 +71,25 @@ class TEBase:
Variable from which Data should be collected
"""
# initialise
+
def __init__(self, parent_variable: str):
- """ Set parent variable and technology specifications (var_specs) from input"""
+ """ Set parent variable and technology specifications
+ (var_specs) from input"""
self._parent_variable: str = parent_variable
- self._var_specs: dict = {key: val for key, val in variables.items() if key.startswith(self._parent_variable)}
+ self._var_specs: dict = {
+ key: val for key, val in variables.items()
+ if key.startswith(self._parent_variable)
+ }
@property
def parent_variable(self) -> str:
- """ Get parent variable"""
+ """Get parent variable"""
return self._parent_variable
class TEDF(TEBase):
"""
- Class to store Technoeconomic DataFiles
+ Class to handle Techno-Economic Data Files (TEDFs).
Parameters
----------
@@ -98,25 +105,24 @@ class TEDF(TEBase):
Methods
----------
load
- Load TEDataFile if it has not been read yet
+ Load TEDF if it has not been read yet.
read
- Read TEDF from CSV file
+ Read TEDF from CSV file.
write
- Write TEDF to CSV file
+ Write TEDF to CSV file.
check
- Check if TEDF is consistent
+ Check if TEDF is consistent.
check_row
- Check that row in TEDF is consistent and return all inconsistencies found for row
+ Check that row in TEDF is consistent and return all
+ inconsistencies found for row.
"""
-
- # typed delcarations
+ # Typed declarations.
_df: None | pd.DataFrame
_inconsistencies: dict
_file_path: None | Path
_fields: dict[str, AbstractFieldDefinition]
_columns: dict[str, AbstractColumnDefinition]
-
def __init__(self,
parent_variable: str,
database_id: str = 'public',
@@ -131,24 +137,24 @@ def __init__(self,
self._file_path = (
None if data is not None else
file_path if file_path is not None else
- databases[database_id] / 'tedfs' / ('/'.join(self._parent_variable.split('|')) + '.csv')
+ (databases[database_id] / 'tedfs' /
+ ('/'.join(self._parent_variable.split('|')) + '.csv'))
)
self._fields, comments = read_fields(self._parent_variable)
self._columns = self._fields | base_columns | comments
@property
def file_path(self) -> Path:
- """ Get or set the file File Path"""
+ """ Get or set the file path"""
return self._file_path
@file_path.setter
def file_path(self, file_path: Path):
self._file_path = file_path
-
def load(self):
"""
- load TEDataFile (only if it has not been read yet)
+ load TEDF (only if it has not been read yet)
Warns
----------
@@ -162,7 +168,8 @@ def load(self):
if self._df is None:
self.read()
else:
- warnings.warn('TEDF is already loaded. Please execute .read() if you want to load from file again.')
+ warnings.warn('TEDF is already loaded. Please execute .read() if '
+ 'you want to load from file again.')
return self
@@ -177,9 +184,10 @@ def read(self):
"""
if self._file_path is None:
- raise Exception('Cannot read from file, as this TEDF object has been created from a dataframe.')
+ raise Exception('Cannot read from file, as this TEDF object has '
+ 'been created from a dataframe.')
- # read CSV file
+ # Read CSV file.
self._df = pd.read_csv(
self._file_path,
sep=',',
@@ -187,14 +195,16 @@ def read(self):
encoding='utf-8',
)
- # check column IDs match base columns and fields
+ # Check column IDs match base columns and fields.
if not all(c in self._columns for c in self._df.columns):
- raise Exception(f"Column IDs used in CSV file do not match columns definition: {self._df.columns.tolist()}")
+ raise Exception(f"Column IDs used in CSV file do not match "
+ f"columns definition: {self._df.columns.tolist()}")
- # adjust row index to start at 1 instead of 0
+ # Adjust row index to start at 1 instead of 0.
self._df.index += 1
- # insert missing columns and reorder via reindexing, then update dtypes
+ # Insert missing columns and reorder via reindexing, then
+ # update dtypes.
df_new = self._df.reindex(columns=list(self._columns.keys()))
for col_id, col in self._columns.items():
if col_id in self._df:
@@ -213,8 +223,9 @@ def write(self):
If there is no file path that specifies where to write
"""
if self._file_path is None:
- raise Exception('Cannot write to file, as this TEDataFile object has been created from a dataframe. Please '
- 'first set a file path on this object.')
+ raise Exception('Cannot write to file, as this TEDataFile object '
+ 'has been created from a dataframe. Please first '
+ 'set a file path on this object.')
self._df.to_csv(
self._file_path,
@@ -225,7 +236,6 @@ def write(self):
na_rep='',
)
-
@property
def data(self) -> pd.DataFrame:
"""Get data, i.e. access dataframe"""
@@ -249,7 +259,10 @@ def check(self, raise_exception: bool = True):
# check row consistency for each row individually
for row_id in self._df.index:
- self._inconsistencies[row_id] = self.check_row(row_id, raise_exception=raise_exception)
+ self._inconsistencies[row_id] = self.check_row(
+ row_id=row_id,
+ raise_exception=raise_exception,
+ )
def check_row(self, row_id: int, raise_exception: bool) -> list[TEDFInconsistencyException]:
"""
@@ -268,20 +281,25 @@ def check_row(self, row_id: int, raise_exception: bool) -> list[TEDFInconsistenc
List of inconsistencies
"""
row = self._df.loc[row_id]
- ikwargs = {'row_id': row_id, 'file_path': self._file_path, 'raise_exception': raise_exception}
+ ikwargs = {
+ 'row_id': row_id,
+ 'file_path': self._file_path,
+ 'raise_exception': raise_exception,
+ }
ret = []
- # check whether fields are among those defined in the technology specs
+ # Check whether fields are among those defined in the technology specs.
for col_id, col in self._columns.items():
cell = row[col_id]
if col.col_type == 'variable':
- cell = cell if pd.isnull(cell) else self.parent_variable + '|' + cell
+ cell = cell if pd.isnull(
+ cell) else self.parent_variable + '|' + cell
if not col.is_allowed(cell):
ret.append(new_inconsistency(
message=f"Invalid cell for column of type '{col.col_type}': {cell}", col_id=col_id, **ikwargs,
))
- # check that reported and reference units match variable definition
+ # Check that reported and reference units match variable definition.
for col_prefix in ['', 'reference_']:
raw_variable = row[col_prefix + 'variable']
col_id = col_prefix + 'unit'
@@ -290,22 +308,34 @@ def check_row(self, row_id: int, raise_exception: bool) -> list[TEDFInconsistenc
continue
if pd.isnull(raw_variable) or pd.isnull(unit):
ret.append(new_inconsistency(
- message=f"Variable and unit must either both be set or both be unset': {raw_variable} -- {unit}",
- col_id=col_id, **ikwargs,
+ message=f"Variable and unit must either both be set or "
+ f"both be unset': {raw_variable} -- {unit}",
+ col_id=col_id,
+ **ikwargs,
))
variable = self.parent_variable + '|' + raw_variable
var_specs = variables[variable]
if 'dimension' not in var_specs:
if unit is not np.nan:
ret.append(new_inconsistency(
- message=f"Unexpected unit '{unit}' for {col_id}.", col_id=col_id, **ikwargs,
+ message=f"Unexpected unit '{unit}' for {col_id}.",
+ col_id=col_id,
+ **ikwargs,
))
continue
dimension = var_specs['dimension']
flow_id = var_specs['flow_id'] if 'flow_id' in var_specs else None
- allowed, message = unit_allowed(unit=unit, flow_id=flow_id, dimension=dimension)
+ allowed, message = unit_allowed(
+ unit=unit,
+ flow_id=flow_id,
+ dimension=dimension,
+ )
if not allowed:
- ret.append(new_inconsistency(message=message, col_id=col_id, **ikwargs))
+ ret.append(new_inconsistency(
+ message=message,
+ col_id=col_id,
+ **ikwargs,
+ ))
return ret
diff --git a/python/posted/units.py b/python/posted/units.py
index 4212acb..c2e00e6 100644
--- a/python/posted/units.py
+++ b/python/posted/units.py
@@ -32,7 +32,6 @@
}
-
def unit_allowed(unit: str, flow_id: None | str, dimension: str):
'''Checks if a given unit is allowed for a specific dimension and flow ID,
handling unit variants and compatibility checks.
@@ -186,7 +185,6 @@ def unit_convert(unit_from: str | float, unit_to: str | float, flow_id: None | s
return ureg(unit_from).to(unit_to, 'flocon', **ctx_kwargs).magnitude
-
def ctx_kwargs_for_variants(variants: list[str | None], flow_id: str):
'''
Generates a dictionary of context key-word arguments for unit conversion for context from flow specs
@@ -216,7 +214,6 @@ def ctx_kwargs_for_variants(variants: list[str | None], flow_id: str):
return ctx_kwargs
-
def split_off_variant(unit: str):
'''
Takes a unit string and splits it into a pure unit and a variant,
diff --git a/python/units_caching.py b/python/units_caching.py
index d7784d5..2032049 100644
--- a/python/units_caching.py
+++ b/python/units_caching.py
@@ -1,10 +1,8 @@
from posted.config import flows
from posted.units import ureg, unit_convert
-from posted.path import BASE_PATH, DATA_PATH
+from posted.path import DATA_PATH
import os
import pandas as pd
-import pint
-
# check allowed dimensions for a flow type
@@ -13,44 +11,54 @@ def allowed_flow_dims(flow_type: None | str):
allowed_dims = ['[currency]']
else:
flow_type_data = flows[flow_type]
- allowed_dims = [str(ureg.Quantity(flow_type_data['default_unit'].split(';')[0]).dimensionality)] # default units dimension is always accepted
- if(flow_type_data['energycontent_LHV'] == flow_type_data['energycontent_LHV'] or \
+ allowed_dims = [str(ureg.Quantity(flow_type_data['default_unit'].split(
+ ';')[0]).dimensionality)] # default units dim is always accepted
+ if (flow_type_data['energycontent_LHV'] == flow_type_data['energycontent_LHV'] or
flow_type_data['energycontent_HHV'] == flow_type_data['energycontent_HHV']):
if '[length] ** 2 * [mass] / [time] ** 2' not in allowed_dims:
allowed_dims += ['[length] ** 2 * [mass] / [time] ** 2']
- if '[mass]' not in allowed_dims: # [mass] is always accepted when there is a energydensity
+ if '[mass]' not in allowed_dims: # [mass] is always accepted when there is a energydensity
allowed_dims += ['[mass]']
- if(flow_type_data['density_norm'] == flow_type_data['density_norm'] or \
- flow_type_data['density_std'] == flow_type_data['density_std']):
+ if (flow_type_data['density_norm'] == flow_type_data['density_norm'] or
+ flow_type_data['density_std'] == flow_type_data['density_std']):
allowed_dims += ['[volume]']
allowed_dims += ['[length] ** 3']
- if '[mass]' not in allowed_dims: # [mass] is always accepted when there is a energydensity
+ if '[mass]' not in allowed_dims: # [mass] is always accepted when there is a energydensity
allowed_dims += ['[mass]']
return allowed_dims
-# ----- Collect a list of all unique units that appear in all the inout data files
+# Collect a list of all unique units that appear in all the inout data files
compatible_units = []
-# Define a list of all units, that should have conversion factors from and to, regardless of their occurrence in data files
-standard_units = ["kWh", "MWh", "GWh", "t", "kg"]
-standard_units_per_year = [unit +"/a" for unit in standard_units]
-standard_units_per_day = [unit +"/d" for unit in standard_units]
-standard_units_per_hour = [unit +"/h" for unit in standard_units]
+# Define a list of all units, that should have conversion factors from and to,
+# regardless of their occurrence in data files
+standard_units = ["kW", "MW", "GW", "kWh", "MWh", "GWh", "t", "kg"]
+standard_units_per_year = [unit + "/a" for unit in standard_units]
+standard_units_per_day = [unit + "/d" for unit in standard_units]
+standard_units_per_hour = [unit + "/h" for unit in standard_units]
+standard_units_times_year = ["a*" + unit for unit in standard_units]
+standard_units_times_day = ["d*" + unit for unit in standard_units]
+standard_units_times_hour = ["h*" + unit for unit in standard_units]
+
+
+standard_units = standard_units + standard_units_per_year + standard_units_per_day + \
+ standard_units_per_hour + standard_units_times_day + \
+ standard_units_times_hour + standard_units_times_year
-standard_units = standard_units + standard_units_per_year + standard_units_per_day + standard_units_per_hour
+print(standard_units)
# Create an empty DataFrame to store the appended data
appended_data = pd.DataFrame()
# Loop through all ted files
-for filename in os.listdir(DATA_PATH /'database/tedfs/Tech/'):
- filepath = os.path.join(DATA_PATH /'database/tedfs/Tech/', filename)
+for filename in os.listdir(DATA_PATH / 'database/tedfs/Tech/'):
+ filepath = os.path.join(DATA_PATH / 'database/tedfs/Tech/', filename)
# Check if the file is a CSV file
if filename.endswith(".csv"):
-
- # Read the ted file and extract only columns "reported_unit" and "reference_unit"
+ # Read the ted file and extract only columns
+ # "reported_unit" and "reference_unit"
data = pd.read_csv(filepath, usecols=["unit", "reference_unit"])
# Append the data to the main DataFrame
@@ -58,9 +66,9 @@ def allowed_flow_dims(flow_type: None | str):
# Get unique values from columns "reported_unit" and "reference_unit"
unique_values = appended_data[["unit", "reference_unit"]].values.ravel()
-print(unique_values)
+
unique_values = pd.unique(unique_values).tolist()
-unique_values.append("MWh/a")
+unique_values = list(set(unique_values + standard_units))
# define unit sets
mass_units = []
@@ -68,7 +76,8 @@ def allowed_flow_dims(flow_type: None | str):
energy_units = []
other_units = []
-# ----- Divide the found units into categories based on dimension and append extensions to them (LHV/HHV/norm/standard)
+# ----- Divide the found units into categories based on
+# dimension and append extensions to them (LHV/HHV/norm/standard)
# loop through all found unqiue unit entries
for unit_str in unique_values:
@@ -106,7 +115,9 @@ def allowed_flow_dims(flow_type: None | str):
elif unit.dimensionality == '[mass]':
# mass units are not augmented
mass_units.append(unit_str)
- # all units without extra info are the units that are convertable without a flow_type
+ # all units without extra info are the units
+ #
+ # that are convertable without a flow_type
other_units.append(unit_str)
# TODO: do automatic adjustment on the base year defined in units.py
@@ -117,20 +128,24 @@ def allowed_flow_dims(flow_type: None | str):
other_units.append("USD_2005/a")
-# ----- Define all possible conversions for each entry type and additionally for a missing entry type
+# ----- Define all possible conversions for each entry type
+# and additionally for a missing entry type
# define conversion set
conversions = []
# add all other units to enable conversion without specifying the flow_type
for unit_from in other_units:
- # iterate over all the commpatible units for the unit_from and unit_to variable to create all possible combinations
+ # iterate over all the commpatible units for the unit_from and
+ # unit_to variable to create all possible combinations
for unit_to in other_units:
- if(unit_from != unit_to):
- conversion = dict(unit_from=unit_from, unit_to=unit_to, flow_type = '')
+ if (unit_from != unit_to):
+ conversion = dict(unit_from=unit_from,
+ unit_to=unit_to, flow_type='')
conversions.append(conversion)
-# for reference_unit, all combinations disregarding the flow_type limitations are added
+# for reference_unit, all combinations disregarding
+# the flow_type limitations are added
# iterate over all flow types
for flow_type in flows.keys():
# get allowed dimensions for the flow type
@@ -138,7 +153,8 @@ def allowed_flow_dims(flow_type: None | str):
# define a set of all possible units for this flow type
compatible_units = []
- # add units to the compatible units set depending on whether flow types allowed dimensions
+ # add units to the compatible units set depending on
+ # whether flow types allowed dimensions
if ('[mass]' in allowed_dims):
compatible_units += mass_units
@@ -150,27 +166,69 @@ def allowed_flow_dims(flow_type: None | str):
compatible_units += volume_units
-
- # iterate over all the commpatible units for the unit_from and unit_to variable to create all possible combinations
+ # iterate over all the commpatible units for the unit_from
+ # and unit_to variable to create all possible combinations
for unit_from in compatible_units:
for unit_to in compatible_units:
- if(unit_from != unit_to):
+ if (unit_from != unit_to):
# add each combination to the conversions set
- conversion = dict(unit_from=unit_from, unit_to=unit_to, flow_type = flow_type)
+ conversion = dict(unit_from=unit_from,
+ unit_to=unit_to, flow_type=flow_type)
conversions.append(conversion)
# ----- Add combinations of units that are not contained in the data
-conversions.append(dict(unit_from="percent", unit_to="dimensionless", flow_type = ''))
-conversions.append(dict(unit_from="dimensionless", unit_to="percent", flow_type = ''))
-conversions.append(dict(unit_from="pct", unit_to="dimensionless", flow_type = ''))
-conversions.append(dict(unit_from="dimensionless", unit_to="pct", flow_type = ''))
-conversions.append(dict(unit_from="percent", unit_to="pct", flow_type=''))
-conversions.append(dict(unit_from="pct", unit_to="percent", flow_type=''))
-conversions.append(dict(unit_from="hour", unit_to="h", flow_type=''))
-conversions.append(dict(unit_from="h", unit_to="hour", flow_type=''))
-conversions.append(dict(unit_from="a", unit_to="h", flow_type=''))
-conversions.append(dict(unit_from="h", unit_to="a", flow_type=''))
-# ----- Call convUnit for each of the conversions and save the result in the cache dataframe
+conversions.append(dict(
+ unit_from='percent',
+ unit_to='dimensionless',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='dimensionless',
+ unit_to='percent',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='pct',
+ unit_to='dimensionless',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='dimensionless',
+ unit_to='pct',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='percent',
+ unit_to='pct',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='pct',
+ unit_to='percent',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='hour',
+ unit_to='h',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='h',
+ unit_to='hour',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='a',
+ unit_to='h',
+ flow_type='',
+))
+conversions.append(dict(
+ unit_from='h',
+ unit_to='a',
+ flow_type='',
+))
+# ----- Call convUnit for each of the conversions and
+# save the result in the cache dataframe
# use dictionary list to temporarily store data for better performance
new_row_list = []
@@ -179,40 +237,40 @@ def allowed_flow_dims(flow_type: None | str):
unit_from = conversion['unit_from']
unit_to = conversion['unit_to']
flow_type = ''
- # use try except block to catch Dimensionality errors, only valid combinations will end up in cache and no logic is needed here to check validity
+ # use try except block to catch Dimensionality errors, only valid
+ # combinations will end up in cache and
+ # no logic is needed here to check validity
try:
-
if conversion['flow_type'] == '':
result = unit_convert(unit_from, unit_to)
-
else:
-
flow_type = conversion['flow_type']
result = unit_convert(unit_from, unit_to, flow_type)
- except: # TODO check if there should be a specific error as in previous version: pint.errors.DimensionalityError:
- # skip this conversion and dont add it to cache
+ # TODO: check if there should be a specific error as in previous
+ # version: pint.errors.DimensionalityError: skip this conversion and
+ # dont add it to cache
+ except:
continue
new_row = {
- "from": unit_from,
- "to": unit_to,
- "ft": flow_type,
- "factor": "{:.9f}".format(result)
+ 'from': unit_from,
+ 'to': unit_to,
+ 'ft': flow_type,
+ 'factor': f"{result:.9f}"
}
- # Append the new row to the dictionary list
+
+ # Append the new row to the dictionary list.
new_row_list.append(new_row)
-# generate dataframe
-dfCache = pd.DataFrame.from_dict(new_row_list)
+# Generate dataframe.
+df_cache = pd.DataFrame.from_dict(new_row_list)
-# save dataframe to csv file
+# Save dataframe to CSV file.
path = DATA_PATH / 'R_unit_cache.csv'
-
-dfCache.to_csv(
- path,
- index=False,
- sep=',',
- quotechar='"',
- encoding='utf-8',
- na_rep='',
- )
-
+df_cache.to_csv(
+ path,
+ index=False,
+ sep=',',
+ quotechar='"',
+ encoding='utf-8',
+ na_rep='',
+)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/noslag.py b/tests/test_noslag.py
similarity index 95%
rename from tests/noslag.py
rename to tests/test_noslag.py
index 3090ab8..5f1d922 100644
--- a/tests/noslag.py
+++ b/tests/test_noslag.py
@@ -1,7 +1,11 @@
import unittest
import os
from posted.noslag import DataSet
-tech_directory = '../inst/extdata/database/tedfs/Tech'
+from posted.path import databases
+
+database = databases["public"]
+tech_directory = f"{database}/tedfs/Tech"
+
tech_files = os.listdir(tech_directory)
tech_files = [filename.split('.')[0] for filename in tech_files]
diff --git a/tests/sources.py b/tests/test_sources.py
similarity index 100%
rename from tests/sources.py
rename to tests/test_sources.py
diff --git a/tests/team.py b/tests/test_team.py
similarity index 68%
rename from tests/team.py
rename to tests/test_team.py
index 7c57d35..10ab5d3 100644
--- a/tests/team.py
+++ b/tests/test_team.py
@@ -9,14 +9,14 @@ def test_team(self):
import posted.team
data = pd.concat([
- DataSet('Tech|ELH2').aggregate(
+ DataSet('Tech|Electrolysis').aggregate(
period=[2030, 2040], subtech=['AEL', 'PEM'], size=['1 MW', '100 MW'],
- agg=['subtech', 'source'], override={'Tech|ELH2|Output Capacity|h2': 'kW;LHV'},
+ agg=['subtech', 'source'], override={'Tech|Electrolysis|Output Capacity|Hydrogen': 'kW;LHV'},
),
- DataSet('Tech|IDR').aggregate(
+ DataSet('Tech|Iron Direct Reduction').aggregate(
period=[2030, 2040], mode='h2',
),
- DataSet('Tech|EAF').aggregate(
+ DataSet('Tech|Electric Arc furnace').aggregate(
period=[2030, 2040], mode='Primary', reheating='w/ reheating',
),
]).reset_index(drop=True)
diff --git a/tests/tedf.py b/tests/test_tedf.py
similarity index 92%
rename from tests/tedf.py
rename to tests/test_tedf.py
index d85c922..6d95bab 100644
--- a/tests/tedf.py
+++ b/tests/test_tedf.py
@@ -2,8 +2,12 @@
import os
from posted.tedf import TEDF
+from posted.path import databases
+
+
+database = databases["public"]
+tech_directory = f"{database}/tedfs/Tech"
-tech_directory = '../inst/extdata/database/tedfs/Tech'
tech_files = os.listdir(tech_directory)
tech_files = [filename.split('.')[0] for filename in tech_files]
diff --git a/tests/units.py b/tests/test_units.py
similarity index 100%
rename from tests/units.py
rename to tests/test_units.py
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 0000000..9108483
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(posted)
+
+test_check("posted")
diff --git a/tests/testthat/test-noslag.R b/tests/testthat/test-noslag.R
new file mode 100644
index 0000000..68b949c
--- /dev/null
+++ b/tests/testthat/test-noslag.R
@@ -0,0 +1,4 @@
+test_that("normalization works", {
+ setwd("../")
+ expect_no_error(DataSet$new('Tech|Electrolysis')$normalise())
+})
diff --git a/tests/testthat/test-tedf.R b/tests/testthat/test-tedf.R
new file mode 100644
index 0000000..8849056
--- /dev/null
+++ b/tests/testthat/test-tedf.R
@@ -0,0 +1,3 @@
+test_that("multiplication works", {
+ expect_equal(2 * 2, 4)
+})
diff --git a/tests/testthat/test-units.R b/tests/testthat/test-units.R
new file mode 100644
index 0000000..8849056
--- /dev/null
+++ b/tests/testthat/test-units.R
@@ -0,0 +1,3 @@
+test_that("multiplication works", {
+ expect_equal(2 * 2, 4)
+})