diff --git a/data/PP/standby_verbruik_demo.ipynb b/demo_standby_verbruik.ipynb
similarity index 96%
rename from data/PP/standby_verbruik_demo.ipynb
rename to demo_standby_verbruik.ipynb
index 57ef794..2e0db94 100644
--- a/data/PP/standby_verbruik_demo.ipynb
+++ b/demo_standby_verbruik.ipynb
@@ -2,14 +2,18 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import polars as pl\n",
"import json\n",
- "import altair as alt"
+ "import altair as alt\n",
+ "\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "# %autoreload?"
]
},
{
@@ -28,28 +32,28 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "5.01 μs ± 161 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
+ "6.07 μs ± 115 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"energy_use_df = pl.scan_ndjson(\n",
- " \"energy_use_test1.ndjson\",\n",
+ " \"data/PP/energy_use_test1.ndjson\",\n",
" schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
")"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -79,14 +83,14 @@
"└───────────────────────────────┴───────┘"
]
},
- "execution_count": 24,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"energy_use_lf_1 = pl.scan_ndjson(\n",
- " \"energy_use_test1.ndjson\",\n",
+ " \"data/PP/energy_use_test1.ndjson\",\n",
" schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
")\n",
"energy_use_lf_1.collect().head()"
@@ -101,21 +105,21 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "35.1 ms ± 2.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+ "34.3 ms ± 1.25 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"# Read the JSON file\n",
- "with open(\"energy_use.json\", \"r\") as file:\n",
+ "with open(\"data/PP/energy_use.json\", \"r\") as file:\n",
" data = json.load(file)\n",
"\n",
"# Convert the data into a list of dictionaries\n",
@@ -131,15 +135,46 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Defining the analysis \n",
+ "# Base Load analysis\n",
"\n",
- "and defining the frames\n",
- "## WHAT is the standby?\n"
+ "## loading in the data"
]
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Base Load: 90570.4W\n",
+ "Daily Usage: 2173.7 kWh\n",
+ "Base Percentage: 65.5%\n"
+ ]
+ }
+ ],
+ "source": [
+ "from openenergyid.baseload.main import main\n",
+ "\n",
+ "metrics = main(\"data/PP/energy_use_big.ndjson\")\n",
+ "# display(metrics)\n",
+ "print(f\"Base Load: {metrics.base_load_watts:.1f}W\")\n",
+ "print(f\"Daily Usage: {metrics.daily_usage_kwh:.1f} kWh\")\n",
+ "print(f\"Base Percentage: {metrics.base_percentage:.1f}%\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Test analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -177,11 +212,41 @@
},
"metadata": {},
"output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
shape: (5, 2)timestamp | total |
---|
datetime[μs, Europe/Brussels] | f64 |
2024-01-01 00:00:00 CET | 51.625 |
2024-01-01 00:15:00 CET | 50.75 |
2024-01-01 00:30:00 CET | 38.5 |
2024-01-01 00:45:00 CET | 40.25 |
2024-01-01 01:00:00 CET | 59.500004 |
"
+ ],
+ "text/plain": [
+ "shape: (5, 2)\n",
+ "┌───────────────────────────────┬───────────┐\n",
+ "│ timestamp ┆ total │\n",
+ "│ --- ┆ --- │\n",
+ "│ datetime[μs, Europe/Brussels] ┆ f64 │\n",
+ "╞═══════════════════════════════╪═══════════╡\n",
+ "│ 2024-01-01 00:00:00 CET ┆ 51.625 │\n",
+ "│ 2024-01-01 00:15:00 CET ┆ 50.75 │\n",
+ "│ 2024-01-01 00:30:00 CET ┆ 38.5 │\n",
+ "│ 2024-01-01 00:45:00 CET ┆ 40.25 │\n",
+ "│ 2024-01-01 01:00:00 CET ┆ 59.500004 │\n",
+ "└───────────────────────────────┴───────────┘"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
"energy_use_lf_1 = pl.scan_ndjson(\n",
- " \"energy_use_big.ndjson\",\n",
+ " \"data/PP/energy_use_big.ndjson\",\n",
" schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
")\n",
"testframe = (\n",
@@ -189,19 +254,20 @@
" # .with_columns(pl.col(\"timestamp\").interpolate(method=\"linear\"))\n",
")\n",
"tf = testframe.collect()\n",
- "display(tf)"
+ "display(tf)\n",
+ "display(energy_use_lf_1.collect().head())"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_44261/1445527080.py:2: DeprecationWarning: `GroupBy.count` is deprecated. It has been renamed to `len`.\n",
+ "/tmp/ipykernel_22265/3491075188.py:2: DeprecationWarning: `GroupBy.count` is deprecated. It has been renamed to `len`.\n",
" value_counts = tf.group_by(\"total\").count().sort(\"total\")\n"
]
},
@@ -210,23 +276,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
@@ -314,7 +380,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -380,7 +446,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -432,7 +498,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -440,23 +506,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
@@ -549,35 +615,31 @@
},
{
"cell_type": "code",
- "execution_count": 47,
- "metadata": {
- "vscode": {
- "languageId": "javascript"
- }
- },
+ "execution_count": 23,
+ "metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
@@ -671,7 +733,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -679,23 +741,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
"alt.LayerChart(...)"
]
},
- "execution_count": 32,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -800,7 +862,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -868,7 +930,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -876,23 +938,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
@@ -974,7 +1036,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -982,23 +1044,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
"alt.Chart(...)"
]
},
- "execution_count": 35,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -1069,7 +1131,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -1077,23 +1139,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
"alt.LayerChart(...)"
]
},
- "execution_count": 36,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -1184,7 +1246,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -1192,23 +1254,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
"alt.LayerChart(...)"
]
},
- "execution_count": 37,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -1273,7 +1335,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -1378,7 +1440,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -1386,23 +1448,23 @@
"text/html": [
"\n",
"\n",
- "\n",
+ "\n",
""
],
"text/plain": [
@@ -1476,114 +1538,6 @@
"kde_chart.display()"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### testing differences between 1. absolute lowest value 2. 1 percent or 3. 5 percent"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [],
- "source": [
- "# import altair as alt\n",
- "\n",
- "# # Assuming df is your DataFrame from the previous analysis\n",
- "# # clear lowest1percent variable\n",
- "# # df = df.with_columns(pl.lit(None).alias(\"lowest_1_percent\"))\n",
- "# # 1. Comparison of Different Standby Measures Over Time\n",
- "# standby_comparison = (\n",
- "# alt.Chart(df)\n",
- "# .transform_fold([\"low_end\", \"lowest_1_percent\", \"lowest_5_percent\"], as_=[\"measure\", \"value\"])\n",
- "# .mark_line()\n",
- "# .encode(\n",
- "# x=\"timestamp:T\",\n",
- "# y=\"value:Q\",\n",
- "# color=alt.Color(\"measure:N\", scale=alt.Scale(scheme=\"category10\")),\n",
- "# tooltip=[\"timestamp:T\", \"value:Q\", \"measure:N\"],\n",
- "# )\n",
- "# .properties(\n",
- "# width=800, height=400, title=\"Comparison of Different Standby Usage Measures Over Time\"\n",
- "# )\n",
- "# )\n",
- "\n",
- "# # 2. Daily Percentage of Total for Each Measure\n",
- "# percentage_comparison = (\n",
- "# alt.Chart(df)\n",
- "# .transform_fold([\"low_end\", \"lowest_1_percent\", \"lowest_5_percent\"], as_=[\"measure\", \"value\"])\n",
- "# .transform_calculate(percentage=\"datum.value / datum.total * 100\")\n",
- "# .mark_line()\n",
- "# .encode(\n",
- "# x=\"timestamp:T\",\n",
- "# y=alt.Y(\"percentage:Q\", axis=alt.Axis(format=\"%\")),\n",
- "# color=alt.Color(\"measure:N\", scale=alt.Scale(scheme=\"category10\")),\n",
- "# tooltip=[\"timestamp:T\", alt.Tooltip(\"percentage:Q\", format=\".2%\"), \"measure:N\"],\n",
- "# )\n",
- "# .properties(\n",
- "# width=800, height=400, title=\"Daily Percentage of Total Energy Use for Each Standby Measure\"\n",
- "# )\n",
- "# )\n",
- "\n",
- "# # 3. Box Plot of Different Measures\n",
- "# box_plot = (\n",
- "# alt.Chart(df)\n",
- "# .transform_fold([\"low_end\", \"lowest_1_percent\", \"lowest_5_percent\"], as_=[\"measure\", \"value\"])\n",
- "# .mark_boxplot()\n",
- "# .encode(x=\"measure:N\", y=\"value:Q\")\n",
- "# .properties(width=400, height=300, title=\"Distribution of Different Standby Measures\")\n",
- "# )\n",
- "\n",
- "# # 4. Scatter Plot: Lowest 1% vs Lowest 5%\n",
- "# scatter_1_5 = (\n",
- "# alt.Chart(df)\n",
- "# .mark_circle()\n",
- "# .encode(\n",
- "# x=\"lowest_1_percent:Q\",\n",
- "# y=\"lowest_5_percent:Q\",\n",
- "# color=alt.Color(\"month(timestamp):N\", scale=alt.Scale(scheme=\"category10\")),\n",
- "# tooltip=[\"timestamp:T\", \"lowest_1_percent:Q\", \"lowest_5_percent:Q\"],\n",
- "# )\n",
- "# .properties(width=400, height=400, title=\"Lowest 1% vs Lowest 5%\")\n",
- "# )\n",
- "\n",
- "# # Combine charts\n",
- "# combined_chart = (standby_comparison & percentage_comparison) | (box_plot & scatter_1_5)\n",
- "\n",
- "# # Display the combined chart\n",
- "# combined_chart.display()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [],
- "source": [
- "# import pandas as pd\n",
- "\n",
- "# import plotly.express as px\n",
- "# #\n",
- "# # Create a histogram\n",
- "# fig = px.histogram(\n",
- "# df,\n",
- "# x=\"timestamp\",\n",
- "# histfunc=\"avg\",\n",
- "# y=[\"total\", \"lowest_1_percent\"],\n",
- "# title=\"Energy use\",\n",
- "# labels={\n",
- "# \"total\": \"Energy use (kWh)\",\n",
- "# \"min_power_usage_per_day\": \"Min Power Usage per Day (kWh)\",\n",
- "# },\n",
- "# barmode=\"overlay\",\n",
- "# )\n",
- "# fig.update_traces(xbins_size=\"604800000\")\n",
- "\n",
- "# fig.show()"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -1593,7 +1547,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
@@ -1604,6 +1558,14 @@
"An exception has occurred, use %tb to see the full traceback.\n",
"\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m Stopping the notebook execution here.\n"
]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/root/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3585: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
+ " warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
+ ]
}
],
"source": [
@@ -1646,7 +1608,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1829,7 +1791,7 @@
"import matplotlib.pyplot as plt\n",
"\n",
"# Read in pandas series from a json file\n",
- "energy_use_lf_1 = pd.read_json(\"energy_use.json\", orient=\"index\")\n",
+ "energy_use_lf_1 = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n",
"energy_use_lf_1.columns = [\"energy_use\"]\n",
"energy_use_lf_1.Name = \"energy_use\"\n",
"display(energy_use_lf_1)\n",
@@ -1975,7 +1937,7 @@
"import numpy as np\n",
"\n",
"# Read in pandas series from a json file\n",
- "energy_use_lf_1 = pd.read_json(\"energy_use.json\", orient=\"index\")\n",
+ "energy_use_lf_1 = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n",
"energy_use_lf_1.columns = [\"energy_use\"]\n",
"energy_use_lf_1.Name = \"energy_use\"\n",
"display(energy_use_lf_1)\n",
@@ -2020,7 +1982,7 @@
"\n",
"# Load and preprocess the data\n",
"\n",
- "data = pd.read_json(\"energy_use.json\", orient=\"index\")\n",
+ "data = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n",
"data.columns = [\"usage\"]\n",
"data.index.name = \"timestamp\"\n",
"data.index = pd.to_datetime(data.index)"
diff --git a/openenergyid/baseload/__init__.py b/openenergyid/baseload/__init__.py
new file mode 100644
index 0000000..cbf7ffb
--- /dev/null
+++ b/openenergyid/baseload/__init__.py
@@ -0,0 +1,15 @@
+"""Base Load analysis for Open Energy ID."""
+
+from .main import (
+ BaseLoadMetrics,
+ EnergySchema,
+ load_data,
+ calculate_base_load,
+)
+
+__all__ = [
+ "BaseLoadMetrics",
+ "EnergySchema",
+ "load_data",
+ "calculate_base_load",
+]
diff --git a/openenergyid/baseload/main.py b/openenergyid/baseload/main.py
new file mode 100644
index 0000000..b02f6bf
--- /dev/null
+++ b/openenergyid/baseload/main.py
@@ -0,0 +1,114 @@
+"""
+This module provides functionality for loading, validating, and analyzing energy usage data.
+
+Classes:
+ BaseLoadMetrics: A NamedTuple container for base load analysis metrics.
+ EnergySchema: A pandera DataFrameModel for validating energy usage data.
+
+Functions:
+ load_data(path: str) -> pl.LazyFrame:
+ Loads and validates energy usage data from an NDJSON file.
+
+ calculate_base_load(lf: pl.LazyFrame) -> BaseLoadMetrics:
+ Calculates base load metrics from energy usage data.
+
+ main(file_path: str) -> BaseLoadMetrics:
+ Processes energy data and returns base load metrics.
+
+ test_energy_validation():
+ Tests various data validation scenarios using pytest.
+"""
+
+from typing import NamedTuple
+import polars as pl
+import pandera.polars as pa
+## VERY important to use pandera.polars instead of pandera to avoid pandas errors
+
+
+class BaseLoadMetrics(NamedTuple):
+ """Container for base load analysis metrics"""
+
+ base_load_watts: float # Average base load in watts
+ daily_usage_kwh: float # Average daily usage in kWh
+ base_percentage: float # Base load as percentage of total
+
+
+class EnergySchema(pa.DataFrameModel):
+ """Schema for energy usage data validation"""
+
+ timestamp: pl.Datetime = pa.Field(
+ nullable=False,
+ coerce=True,
+ title="Measurement Timestamp",
+ description="Time of energy measurement in Europe/Brussels timezone",
+ )
+ total: float = pa.Field(
+ ge=0, # Power should be non-negative
+ nullable=False,
+ title="Total Power",
+ description="Total power measurement in kW",
+ )
+
+ # Add example of pandera validation: dataframe-level validation
+ @pa.dataframe_check
+ def timestamps_are_ordered(self, data: pl.DataFrame) -> bool:
+ """Check if timestamps are in chronological order"""
+ return data["timestamp"].is_sorted()
+
+
+def load_data(path: str) -> pl.LazyFrame:
+ """Load and validate energy usage data from NDJSON file"""
+ lf = pl.scan_ndjson(
+ path,
+ schema={"timestamp": pl.Datetime(time_zone="Europe/Brussels"), "total": pl.Float64},
+ )
+ # Convert to DataFrame for data-level validation, then back to LazyFrame for processing
+ validated_df = EnergySchema.validate(lf).collect() # type: ignore
+ return pl.LazyFrame(validated_df)
+
+
+def calculate_base_load(lf: pl.LazyFrame) -> BaseLoadMetrics:
+ """
+ Calculate base load metrics from energy usage data.
+
+ Takes lowest 10 totals per day to determine base load.
+ Returns watts, kwh, and percentage metrics.
+ """
+ metrics_df = (
+ lf.filter(pl.col("total") >= 0)
+ .sort("timestamp")
+ .group_by_dynamic("timestamp", every="1d")
+ .agg(
+ [
+ pl.col("total").sum().alias("total_daily_usage"),
+ (pl.col("total").sort().head(10).mean() * 4 * 24).alias("base_load_daily_kwh"),
+ ]
+ )
+ .with_columns(
+ [
+ (pl.col("base_load_daily_kwh") / pl.col("total_daily_usage") * 100).alias(
+ "base_percentage"
+ )
+ ]
+ )
+ .select(
+ [
+ pl.col("base_load_daily_kwh").mean().alias("avg_daily_kwh"),
+ (pl.col("base_load_daily_kwh") * 1000 / 24).mean().alias("avg_watts"),
+ pl.col("base_percentage").mean().alias("avg_percentage"),
+ ]
+ )
+ .collect() # TODO add validation for input data: correct format, not null, etc.
+ )
+
+ return BaseLoadMetrics(
+ base_load_watts=metrics_df[0, "avg_watts"],
+ daily_usage_kwh=metrics_df[0, "avg_daily_kwh"],
+ base_percentage=metrics_df[0, "avg_percentage"],
+ )
+
+
+def main(file_path: str) -> BaseLoadMetrics:
+ """Process energy data and return base load metrics"""
+ lf = load_data(file_path)
+ return calculate_base_load(lf)
diff --git a/vis/basislast/monthly_bar.png b/vis/basislast/monthly_bar.png
new file mode 100644
index 0000000..19d45d2
Binary files /dev/null and b/vis/basislast/monthly_bar.png differ
diff --git a/vis/basislast/visualization.png b/vis/basislast/visualization.png
new file mode 100644
index 0000000..c4bdb97
Binary files /dev/null and b/vis/basislast/visualization.png differ
diff --git a/vis/basislast/visualization_month.png b/vis/basislast/visualization_month.png
new file mode 100644
index 0000000..ed05fbe
Binary files /dev/null and b/vis/basislast/visualization_month.png differ