diff --git a/demo_baseLoad.ipynb b/demo_baseLoad.ipynb
index 35d5c00..8471c63 100644
--- a/demo_baseLoad.ipynb
+++ b/demo_baseLoad.ipynb
@@ -2,398 +2,88 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# imports\n",
- "import polars as pl\n",
- "import json\n",
"import altair as alt\n",
+ "import polars as pl\n",
"\n",
"%load_ext autoreload\n",
- "%autoreload 2\n",
- "# %autoreload?"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# First some speedtests"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## test 1 reading in a newline delimited json to check efficiency\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "7.42 μs ± 671 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
- ]
- }
- ],
- "source": [
- "%%timeit\n",
- "energy_use_df = pl.scan_ndjson(\n",
- " \"data/PP/energy_use_test1.ndjson\",\n",
- " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "
shape: (5, 2) timestamp total datetime[μs, Europe/Brussels] f64 2023-01-01 00:00:00 CET 0.025 2023-01-01 00:15:00 CET 0.017 2023-01-01 00:30:00 CET 0.023 2023-01-01 00:45:00 CET 0.024 2023-01-01 01:00:00 CET 0.023
"
- ],
- "text/plain": [
- "shape: (5, 2)\n",
- "┌───────────────────────────────┬───────┐\n",
- "│ timestamp ┆ total │\n",
- "│ --- ┆ --- │\n",
- "│ datetime[μs, Europe/Brussels] ┆ f64 │\n",
- "╞═══════════════════════════════╪═══════╡\n",
- "│ 2023-01-01 00:00:00 CET ┆ 0.025 │\n",
- "│ 2023-01-01 00:15:00 CET ┆ 0.017 │\n",
- "│ 2023-01-01 00:30:00 CET ┆ 0.023 │\n",
- "│ 2023-01-01 00:45:00 CET ┆ 0.024 │\n",
- "│ 2023-01-01 01:00:00 CET ┆ 0.023 │\n",
- "└───────────────────────────────┴───────┘"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "energy_use_lf_1 = pl.scan_ndjson(\n",
- " \"data/PP/energy_use_test1.ndjson\",\n",
- " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
- ")\n",
- "energy_use_lf_1.collect().head()"
+ "%autoreload 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Test 2, reading in the \"smaller version of the json\" and tranforming it into polars."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "39.9 ms ± 5.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
- ]
- }
- ],
- "source": [
- "%%timeit\n",
- "# Read the JSON file\n",
- "with open(\"data/PP/energy_use.json\", \"r\") as file:\n",
- " data = json.load(file)\n",
+ "# Base Load analysis\n",
+ "\n",
+ "Demo of a base load analysis for a dossier, we define some KPIs we want to measure.\n",
"\n",
- "# Convert the data into a list of dictionaries\n",
- "data_list = [{\"timestamp\": int(k), \"value\": v} for k, v in data.items()]\n",
"\n",
- "# Create a DataFrame from the list\n",
- "df = pl.DataFrame(\n",
- " data_list, schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"value\": pl.Float64}\n",
- ")"
+ "\n",
+ "## loading in the data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Base Load analysis\n",
+ "# Base Load Analysis Demo\n",
"\n",
- "## loading in the data"
+ "This notebook demonstrates how to analyze base load (standby power consumption) in energy usage data. Base load represents the minimum continuous power draw in a system, typically from devices that are always on or in standby mode.\n",
+ "\n",
+ "## Key Metrics\n",
+ "\n",
+ "We analyze three core metrics:\n",
+ "1. Base load value in WATTS - Shows the consistent minimum power draw\n",
+ "2. Energy consumption in kWh - Quantifies power used over time\n",
+ "3. Base load percentage - Shows what portion of total consumption is baseline\n",
+ "\n",
+ "## Data Format Requirements\n",
+ "\n",
+ "The analysis expects data in the following format:\n",
+ "- Timestamp (datetime with timezone 'Europe/Brussels')\n",
+ "- Total power (float, in kW)\n",
+ "\n",
+ "Example input data structure:\n",
+ "```json\n",
+ "{\n",
+ " \"timestamp\": \"2024-01-01T00:00:00+01:00\",\n",
+ " \"total\": 0.5\n",
+ "}"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "
shape: (12, 6) timestamp total_usage base_load_kwh period_start base_percentage base_load_watts datetime[μs] f64 f64 datetime[μs] f64 f64 2023-12-01 00:00:00 181.125 4347.0 2023-12-31 23:00:00 2400.0 181125.0 2024-01-01 00:00:00 148349.250304 2074.8 2024-01-01 00:00:00 1.398591 86450.0 2024-02-01 00:00:00 128940.875354 1923.6 2024-02-01 00:00:00 1.491847 80150.0 2024-03-01 00:00:00 128139.375394 1898.4 2024-03-01 00:00:00 1.481512 79100.0 2024-04-01 00:00:00 116175.500248 1705.2 2024-04-01 00:00:00 1.467779 71050.0 … … … … … … 2024-07-01 00:00:00 113613.500188 1789.2 2024-07-01 00:00:00 1.574813 74550.0 2024-08-01 00:00:00 107086.875286 1680.0 2024-08-01 00:00:00 1.56882 70000.0 2024-09-01 00:00:00 114579.500248 1730.4 2024-09-01 00:00:00 1.510218 72100.0 2024-10-01 00:00:00 126714.875234 1814.4 2024-10-01 00:00:00 1.431876 75600.0 2024-11-01 00:00:00 13360.375028 2108.4 2024-11-01 00:00:00 15.780994 87850.0
"
- ],
- "text/plain": [
- "shape: (12, 6)\n",
- "┌────────────────┬───────────────┬───────────────┬────────────────┬────────────────┬───────────────┐\n",
- "│ timestamp ┆ total_usage ┆ base_load_kwh ┆ period_start ┆ base_percentag ┆ base_load_wat │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ e ┆ ts │\n",
- "│ datetime[μs] ┆ f64 ┆ f64 ┆ datetime[μs] ┆ --- ┆ --- │\n",
- "│ ┆ ┆ ┆ ┆ f64 ┆ f64 │\n",
- "╞════════════════╪═══════════════╪═══════════════╪════════════════╪════════════════╪═══════════════╡\n",
- "│ 2023-12-01 ┆ 181.125 ┆ 4347.0 ┆ 2023-12-31 ┆ 2400.0 ┆ 181125.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 23:00:00 ┆ ┆ │\n",
- "│ 2024-01-01 ┆ 148349.250304 ┆ 2074.8 ┆ 2024-01-01 ┆ 1.398591 ┆ 86450.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-02-01 ┆ 128940.875354 ┆ 1923.6 ┆ 2024-02-01 ┆ 1.491847 ┆ 80150.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-03-01 ┆ 128139.375394 ┆ 1898.4 ┆ 2024-03-01 ┆ 1.481512 ┆ 79100.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-04-01 ┆ 116175.500248 ┆ 1705.2 ┆ 2024-04-01 ┆ 1.467779 ┆ 71050.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
- "│ 2024-07-01 ┆ 113613.500188 ┆ 1789.2 ┆ 2024-07-01 ┆ 1.574813 ┆ 74550.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-08-01 ┆ 107086.875286 ┆ 1680.0 ┆ 2024-08-01 ┆ 1.56882 ┆ 70000.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-09-01 ┆ 114579.500248 ┆ 1730.4 ┆ 2024-09-01 ┆ 1.510218 ┆ 72100.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-10-01 ┆ 126714.875234 ┆ 1814.4 ┆ 2024-10-01 ┆ 1.431876 ┆ 75600.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-11-01 ┆ 13360.375028 ┆ 2108.4 ┆ 2024-11-01 ┆ 15.780994 ┆ 87850.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "└────────────────┴───────────────┴───────────────┴────────────────┴────────────────┴───────────────┘"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- "
shape: (310, 6) timestamp total_usage base_load_kwh period_start base_percentage base_load_watts datetime[μs] f64 f64 datetime[μs] f64 f64 2023-12-31 00:00:00 181.125 4347.0 2023-12-31 23:00:00 2400.0 181125.0 2024-01-01 00:00:00 4403.875058 2822.400115 2024-01-01 00:00:00 64.089014 117600.0048 2024-01-02 00:00:00 4963.000018 2570.400058 2024-01-02 00:00:00 51.791256 107100.0024 2024-01-03 00:00:00 4891.250018 2578.800058 2024-01-03 00:00:00 52.72272 107450.0024 2024-01-04 00:00:00 4745.125012 2419.2 2024-01-04 00:00:00 50.982851 100800.0 … … … … … … 2024-10-31 00:00:00 4312.000016 2158.8 2024-10-31 00:00:00 50.064935 89950.0 2024-11-01 00:00:00 2502.500012 2175.6 2024-11-01 00:00:00 86.937063 90650.0 2024-11-02 00:00:00 2762.375 2184.0 2024-11-02 00:00:00 79.062401 91000.0 2024-11-03 00:00:00 2585.625022 2175.6 2024-11-03 00:00:00 84.142131 90650.0 2024-11-04 00:00:00 5509.874994 2167.2 2024-11-04 00:00:00 39.333016 90300.0
"
- ],
- "text/plain": [
- "shape: (310, 6)\n",
- "┌─────────────────┬─────────────┬───────────────┬────────────────┬────────────────┬────────────────┐\n",
- "│ timestamp ┆ total_usage ┆ base_load_kwh ┆ period_start ┆ base_percentag ┆ base_load_watt │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ e ┆ s │\n",
- "│ datetime[μs] ┆ f64 ┆ f64 ┆ datetime[μs] ┆ --- ┆ --- │\n",
- "│ ┆ ┆ ┆ ┆ f64 ┆ f64 │\n",
- "╞═════════════════╪═════════════╪═══════════════╪════════════════╪════════════════╪════════════════╡\n",
- "│ 2023-12-31 ┆ 181.125 ┆ 4347.0 ┆ 2023-12-31 ┆ 2400.0 ┆ 181125.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 23:00:00 ┆ ┆ │\n",
- "│ 2024-01-01 ┆ 4403.875058 ┆ 2822.400115 ┆ 2024-01-01 ┆ 64.089014 ┆ 117600.0048 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-01-02 ┆ 4963.000018 ┆ 2570.400058 ┆ 2024-01-02 ┆ 51.791256 ┆ 107100.0024 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-01-03 ┆ 4891.250018 ┆ 2578.800058 ┆ 2024-01-03 ┆ 52.72272 ┆ 107450.0024 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-01-04 ┆ 4745.125012 ┆ 2419.2 ┆ 2024-01-04 ┆ 50.982851 ┆ 100800.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
- "│ 2024-10-31 ┆ 4312.000016 ┆ 2158.8 ┆ 2024-10-31 ┆ 50.064935 ┆ 89950.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-11-01 ┆ 2502.500012 ┆ 2175.6 ┆ 2024-11-01 ┆ 86.937063 ┆ 90650.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-11-02 ┆ 2762.375 ┆ 2184.0 ┆ 2024-11-02 ┆ 79.062401 ┆ 91000.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-11-03 ┆ 2585.625022 ┆ 2175.6 ┆ 2024-11-03 ┆ 84.142131 ┆ 90650.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-11-04 ┆ 5509.874994 ┆ 2167.2 ┆ 2024-11-04 ┆ 39.333016 ┆ 90300.0 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "└─────────────────┴─────────────┴───────────────┴────────────────┴────────────────┴────────────────┘"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- "
shape: (7_416, 6) timestamp total_usage base_load_kwh period_start base_percentage base_load_watts datetime[μs] f64 f64 datetime[μs] f64 f64 2023-12-31 23:00:00 181.125 4347.0 2023-12-31 23:00:00 2400.0 181125.0 2024-01-01 00:00:00 204.750008 4914.000192 2024-01-01 00:00:00 2400.0 204750.008 2024-01-01 01:00:00 182.0 4368.0 2024-01-01 01:00:00 2400.0 182000.0 2024-01-01 02:00:00 169.75 4074.0 2024-01-01 02:00:00 2400.0 169750.0 2024-01-01 03:00:00 162.75 3906.0 2024-01-01 03:00:00 2400.0 162750.0 … … … … … … 2024-11-04 18:00:00 243.249996 5837.999904 2024-11-04 18:00:00 2400.0 243249.996 2024-11-04 19:00:00 208.25 4998.0 2024-11-04 19:00:00 2400.0 208250.0 2024-11-04 20:00:00 199.5 4788.0 2024-11-04 20:00:00 2400.0 199500.0 2024-11-04 21:00:00 170.625 4095.0 2024-11-04 21:00:00 2400.0 170625.0 2024-11-04 22:00:00 124.250002 2982.000048 2024-11-04 22:00:00 2400.0 124250.002
"
- ],
- "text/plain": [
- "shape: (7_416, 6)\n",
- "┌─────────────────┬─────────────┬───────────────┬────────────────┬────────────────┬────────────────┐\n",
- "│ timestamp ┆ total_usage ┆ base_load_kwh ┆ period_start ┆ base_percentag ┆ base_load_watt │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ e ┆ s │\n",
- "│ datetime[μs] ┆ f64 ┆ f64 ┆ datetime[μs] ┆ --- ┆ --- │\n",
- "│ ┆ ┆ ┆ ┆ f64 ┆ f64 │\n",
- "╞═════════════════╪═════════════╪═══════════════╪════════════════╪════════════════╪════════════════╡\n",
- "│ 2023-12-31 ┆ 181.125 ┆ 4347.0 ┆ 2023-12-31 ┆ 2400.0 ┆ 181125.0 │\n",
- "│ 23:00:00 ┆ ┆ ┆ 23:00:00 ┆ ┆ │\n",
- "│ 2024-01-01 ┆ 204.750008 ┆ 4914.000192 ┆ 2024-01-01 ┆ 2400.0 ┆ 204750.008 │\n",
- "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n",
- "│ 2024-01-01 ┆ 182.0 ┆ 4368.0 ┆ 2024-01-01 ┆ 2400.0 ┆ 182000.0 │\n",
- "│ 01:00:00 ┆ ┆ ┆ 01:00:00 ┆ ┆ │\n",
- "│ 2024-01-01 ┆ 169.75 ┆ 4074.0 ┆ 2024-01-01 ┆ 2400.0 ┆ 169750.0 │\n",
- "│ 02:00:00 ┆ ┆ ┆ 02:00:00 ┆ ┆ │\n",
- "│ 2024-01-01 ┆ 162.75 ┆ 3906.0 ┆ 2024-01-01 ┆ 2400.0 ┆ 162750.0 │\n",
- "│ 03:00:00 ┆ ┆ ┆ 03:00:00 ┆ ┆ │\n",
- "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
- "│ 2024-11-04 ┆ 243.249996 ┆ 5837.999904 ┆ 2024-11-04 ┆ 2400.0 ┆ 243249.996 │\n",
- "│ 18:00:00 ┆ ┆ ┆ 18:00:00 ┆ ┆ │\n",
- "│ 2024-11-04 ┆ 208.25 ┆ 4998.0 ┆ 2024-11-04 ┆ 2400.0 ┆ 208250.0 │\n",
- "│ 19:00:00 ┆ ┆ ┆ 19:00:00 ┆ ┆ │\n",
- "│ 2024-11-04 ┆ 199.5 ┆ 4788.0 ┆ 2024-11-04 ┆ 2400.0 ┆ 199500.0 │\n",
- "│ 20:00:00 ┆ ┆ ┆ 20:00:00 ┆ ┆ │\n",
- "│ 2024-11-04 ┆ 170.625 ┆ 4095.0 ┆ 2024-11-04 ┆ 2400.0 ┆ 170625.0 │\n",
- "│ 21:00:00 ┆ ┆ ┆ 21:00:00 ┆ ┆ │\n",
- "│ 2024-11-04 ┆ 124.250002 ┆ 2982.000048 ┆ 2024-11-04 ┆ 2400.0 ┆ 124250.002 │\n",
- "│ 22:00:00 ┆ ┆ ┆ 22:00:00 ┆ ┆ │\n",
- "└─────────────────┴─────────────┴───────────────┴────────────────┴────────────────┴────────────────┘"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
- "from openenergyid.baseload.main import main, TimeFrame\n",
+ "from openenergyid.baseload.main import main\n",
+ "from openenergyid.enums import Granularity\n",
+ "\n",
"\n",
"# Monthly analysis\n",
- "monthly_metrics = main(\"data/PP/energy_use_big.ndjson\", TimeFrame.MONTHLY)\n",
+ "monthly_metrics = main(\"data/PP/energy_use_big.ndjson\", Granularity.P1M)\n",
"\n",
"# Daily analysis\n",
- "daily_metrics = main(\"data/PP/energy_use_big.ndjson\", TimeFrame.DAILY)\n",
+ "daily_metrics = main(\"data/PP/energy_use_big.ndjson\", Granularity.P1D)\n",
"\n",
"# Hourly analysis\n",
- "hourly_metrics = main(\"data/PP/energy_use_big.ndjson\", TimeFrame.HOURLY)\n",
- "# print the metrics\n",
+ "hourly_metrics = main(\"data/PP/energy_use_big.ndjson\", Granularity.PT1H)\n",
+ "\n",
"display(monthly_metrics)\n",
"display(daily_metrics)\n",
- "display(hourly_metrics)\n",
- "\n",
- "\n",
- "# metrics = main(\"data/PP/energy_use_big.ndjson\")\n",
- "# display(metrics)\n",
- "# print(f\"Base Load: {metrics.base_load_watts:.1f}W\")\n",
- "# print(f\"Daily Usage: {metrics.daily_usage_kwh:.1f} kWh\")\n",
- "# print(f\"Base Percentage: {metrics.base_percentage:.1f}%\")"
+ "display(hourly_metrics)"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.VConcatChart(...)"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"def create_monthly_chart(df):\n",
" \"\"\"Create bar chart for monthly data\"\"\"\n",
@@ -483,76 +173,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "
shape: (29_664, 2) timestamp total datetime[μs, Europe/Brussels] f64 2024-01-11 09:15:00 CET 119.874992 2024-01-11 09:45:00 CET 119.000008 2024-01-11 10:30:00 CET 117.25 2024-11-04 11:00:00 CET 117.25 2024-01-11 10:15:00 CET 116.375 … … 2024-05-20 13:45:00 CEST 15.749999 2024-06-09 13:00:00 CEST 15.749999 2024-06-09 13:15:00 CEST 15.749999 2024-06-09 14:15:00 CEST 15.749999 2024-06-23 11:00:00 CEST 15.749999
"
- ],
- "text/plain": [
- "shape: (29_664, 2)\n",
- "┌───────────────────────────────┬────────────┐\n",
- "│ timestamp ┆ total │\n",
- "│ --- ┆ --- │\n",
- "│ datetime[μs, Europe/Brussels] ┆ f64 │\n",
- "╞═══════════════════════════════╪════════════╡\n",
- "│ 2024-01-11 09:15:00 CET ┆ 119.874992 │\n",
- "│ 2024-01-11 09:45:00 CET ┆ 119.000008 │\n",
- "│ 2024-01-11 10:30:00 CET ┆ 117.25 │\n",
- "│ 2024-11-04 11:00:00 CET ┆ 117.25 │\n",
- "│ 2024-01-11 10:15:00 CET ┆ 116.375 │\n",
- "│ … ┆ … │\n",
- "│ 2024-05-20 13:45:00 CEST ┆ 15.749999 │\n",
- "│ 2024-06-09 13:00:00 CEST ┆ 15.749999 │\n",
- "│ 2024-06-09 13:15:00 CEST ┆ 15.749999 │\n",
- "│ 2024-06-09 14:15:00 CEST ┆ 15.749999 │\n",
- "│ 2024-06-23 11:00:00 CEST ┆ 15.749999 │\n",
- "└───────────────────────────────┴────────────┘"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- "
shape: (5, 2) timestamp total datetime[μs, Europe/Brussels] f64 2024-01-01 00:00:00 CET 51.625 2024-01-01 00:15:00 CET 50.75 2024-01-01 00:30:00 CET 38.5 2024-01-01 00:45:00 CET 40.25 2024-01-01 01:00:00 CET 59.500004
"
- ],
- "text/plain": [
- "shape: (5, 2)\n",
- "┌───────────────────────────────┬───────────┐\n",
- "│ timestamp ┆ total │\n",
- "│ --- ┆ --- │\n",
- "│ datetime[μs, Europe/Brussels] ┆ f64 │\n",
- "╞═══════════════════════════════╪═══════════╡\n",
- "│ 2024-01-01 00:00:00 CET ┆ 51.625 │\n",
- "│ 2024-01-01 00:15:00 CET ┆ 50.75 │\n",
- "│ 2024-01-01 00:30:00 CET ┆ 38.5 │\n",
- "│ 2024-01-01 00:45:00 CET ┆ 40.25 │\n",
- "│ 2024-01-01 01:00:00 CET ┆ 59.500004 │\n",
- "└───────────────────────────────┴───────────┘"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"energy_use_lf_1 = pl.scan_ndjson(\n",
" \"data/PP/energy_use_big.ndjson\",\n",
@@ -569,95 +192,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_13152/3491075188.py:2: DeprecationWarning: `GroupBy.count` is deprecated. It has been renamed to `len`.\n",
- " value_counts = tf.group_by(\"total\").count().sort(\"total\")\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.Chart(...)"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# Compute the value counts using Polars\n",
"value_counts = tf.group_by(\"total\").count().sort(\"total\")\n",
@@ -689,46 +226,9 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "
shape: (5, 5) timestamp total_daily_usage lowest_recorded min_power_usage_per_day max_power_usage_per_day datetime[μs, Europe/Brussels] f64 f64 f64 f64 2024-01-01 00:00:00 CET 4462.500054 28.000002 2688.000192 6468.0 2024-01-02 00:00:00 CET 4943.750022 24.5 2352.0 8400.0 2024-01-03 00:00:00 CET 4912.250016 25.375 2436.0 8484.0 2024-01-04 00:00:00 CET 4757.375014 21.875 2100.0 8736.0 2024-01-05 00:00:00 CET 4779.25001 22.75 2184.0 8316.0
"
- ],
- "text/plain": [
- "shape: (5, 5)\n",
- "┌────────────────────┬───────────────────┬─────────────────┬───────────────────┬───────────────────┐\n",
- "│ timestamp ┆ total_daily_usage ┆ lowest_recorded ┆ min_power_usage_p ┆ max_power_usage_p │\n",
- "│ --- ┆ --- ┆ --- ┆ er_day ┆ er_day │\n",
- "│ datetime[μs, ┆ f64 ┆ f64 ┆ --- ┆ --- │\n",
- "│ Europe/Brussels] ┆ ┆ ┆ f64 ┆ f64 │\n",
- "╞════════════════════╪═══════════════════╪═════════════════╪═══════════════════╪═══════════════════╡\n",
- "│ 2024-01-01 ┆ 4462.500054 ┆ 28.000002 ┆ 2688.000192 ┆ 6468.0 │\n",
- "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n",
- "│ 2024-01-02 ┆ 4943.750022 ┆ 24.5 ┆ 2352.0 ┆ 8400.0 │\n",
- "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n",
- "│ 2024-01-03 ┆ 4912.250016 ┆ 25.375 ┆ 2436.0 ┆ 8484.0 │\n",
- "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n",
- "│ 2024-01-04 ┆ 4757.375014 ┆ 21.875 ┆ 2100.0 ┆ 8736.0 │\n",
- "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n",
- "│ 2024-01-05 ┆ 4779.25001 ┆ 22.75 ┆ 2184.0 ┆ 8316.0 │\n",
- "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n",
- "└────────────────────┴───────────────────┴─────────────────┴───────────────────┴───────────────────┘"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"lf = (\n",
" energy_use_lf_1.filter(pl.col(\"total\") >= 0)\n",
@@ -755,19 +255,9 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Average Basislast: 90671.5W\n",
- "Average Daily Usage: 2176.1 kWh\n",
- "Average Percentage: 58.3%\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"lf = (\n",
" energy_use_lf_1.filter(pl.col(\"total\") >= 0)\n",
@@ -807,87 +297,9 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.LayerChart(...)"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"total_new = (\n",
" alt.Chart(df)\n",
@@ -924,87 +336,9 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.LayerChart(...)"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"month_filter = \"month(datum.timestamp) == 2\" # Altair datetime function syntax\n",
"\n",
@@ -1042,88 +376,9 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.LayerChart(...)"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Resample to monthly totals\n",
"monthly_lf = (\n",
@@ -1171,46 +426,9 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "
shape: (11, 3) timestamp total_monthly_usage basislast_monthly_kwh datetime[μs, Europe/Brussels] f64 f64 2024-01-01 00:00:00 CET 148429.750304 62244.0 2024-02-01 00:00:00 CET 128935.625352 57708.0 2024-03-01 00:00:00 CET 128010.750398 56952.0 2024-04-01 00:00:00 CEST 116233.250246 51156.0 2024-05-01 00:00:00 CEST 118999.125465 47879.999136 … … … 2024-07-01 00:00:00 CEST 113646.75019 53676.0 2024-08-01 00:00:00 CEST 107087.750286 50400.0 2024-09-01 00:00:00 CEST 114583.000248 52164.0 2024-10-01 00:00:00 CEST 126777.875234 54180.0 2024-11-01 00:00:00 CET 13460.125028 63252.0
"
- ],
- "text/plain": [
- "shape: (11, 3)\n",
- "┌───────────────────────────────┬─────────────────────┬───────────────────────┐\n",
- "│ timestamp ┆ total_monthly_usage ┆ basislast_monthly_kwh │\n",
- "│ --- ┆ --- ┆ --- │\n",
- "│ datetime[μs, Europe/Brussels] ┆ f64 ┆ f64 │\n",
- "╞═══════════════════════════════╪═════════════════════╪═══════════════════════╡\n",
- "│ 2024-01-01 00:00:00 CET ┆ 148429.750304 ┆ 62244.0 │\n",
- "│ 2024-02-01 00:00:00 CET ┆ 128935.625352 ┆ 57708.0 │\n",
- "│ 2024-03-01 00:00:00 CET ┆ 128010.750398 ┆ 56952.0 │\n",
- "│ 2024-04-01 00:00:00 CEST ┆ 116233.250246 ┆ 51156.0 │\n",
- "│ 2024-05-01 00:00:00 CEST ┆ 118999.125465 ┆ 47879.999136 │\n",
- "│ … ┆ … ┆ … │\n",
- "│ 2024-07-01 00:00:00 CEST ┆ 113646.75019 ┆ 53676.0 │\n",
- "│ 2024-08-01 00:00:00 CEST ┆ 107087.750286 ┆ 50400.0 │\n",
- "│ 2024-09-01 00:00:00 CEST ┆ 114583.000248 ┆ 52164.0 │\n",
- "│ 2024-10-01 00:00:00 CEST ┆ 126777.875234 ┆ 54180.0 │\n",
- "│ 2024-11-01 00:00:00 CET ┆ 13460.125028 ┆ 63252.0 │\n",
- "└───────────────────────────────┴─────────────────────┴───────────────────────┘"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"display(monthly_df)"
]
@@ -1239,87 +457,9 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.LayerChart(...)"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"alt.data_transformers.enable(\"vegafusion\")\n",
"\n",
@@ -1345,88 +485,9 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.Chart(...)"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"alt.data_transformers.enable(\"vegafusion\")\n",
"alt.Chart(tf).transform_density(\n",
@@ -1440,88 +501,9 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.LayerChart(...)"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"max = (\n",
" alt.Chart(df_extended)\n",
@@ -1555,88 +537,9 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.LayerChart(...)"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# comparing the options\n",
"max + lowest + lowest_new"
@@ -1644,35 +547,9 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "ename": "ValueError",
- "evalue": "DataFusion error: This feature is not implemented: Unsupported TRY_CAST from Float64 to Null\n Context[0]: Failed to get node value\n",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/IPython/core/formatters.py:977\u001b[0m, in \u001b[0;36mMimeBundleFormatter.__call__\u001b[0;34m(self, obj, include, exclude)\u001b[0m\n\u001b[1;32m 974\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n\u001b[1;32m 976\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 977\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 978\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 979\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
- "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/vegalite/v5/api.py:3417\u001b[0m, in \u001b[0;36mTopLevelMixin._repr_mimebundle_\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 3415\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3416\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m renderer \u001b[38;5;241m:=\u001b[39m renderers\u001b[38;5;241m.\u001b[39mget():\n\u001b[0;32m-> 3417\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrenderer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdct\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/utils/display.py:225\u001b[0m, in \u001b[0;36mHTMLRenderer.__call__\u001b[0;34m(self, spec, **metadata)\u001b[0m\n\u001b[1;32m 223\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[1;32m 224\u001b[0m kwargs\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmetadata, output_div\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_div)\n\u001b[0;32m--> 225\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mspec_to_mimebundle\u001b[49m\u001b[43m(\u001b[49m\u001b[43mspec\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhtml\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/utils/mimebundle.py:122\u001b[0m, in \u001b[0;36mspec_to_mimebundle\u001b[0;34m(spec, format, mode, vega_version, vegaembed_version, vegalite_version, embed_options, engine, **kwargs)\u001b[0m\n\u001b[1;32m 120\u001b[0m internal_mode: Literal[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvega-lite\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvega\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m mode\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m using_vegafusion():\n\u001b[0;32m--> 122\u001b[0m spec \u001b[38;5;241m=\u001b[39m \u001b[43mcompile_with_vegafusion\u001b[49m\u001b[43m(\u001b[49m\u001b[43mspec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 123\u001b[0m internal_mode \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvega\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;66;03m# Default to the embed options set by alt.renderers.set_embed_options\u001b[39;00m\n",
- "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/utils/_vegafusion_data.py:250\u001b[0m, in \u001b[0;36mcompile_with_vegafusion\u001b[0;34m(vegalite_spec)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;66;03m# Pre-evaluate transforms in vega spec with vegafusion\u001b[39;00m\n\u001b[1;32m 249\u001b[0m row_limit \u001b[38;5;241m=\u001b[39m data_transformers\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_rows\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 250\u001b[0m transformed_vega_spec, warnings \u001b[38;5;241m=\u001b[39m \u001b[43mvf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mruntime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpre_transform_spec\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mvega_spec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mvf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_local_tz\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43minline_datasets\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minline_tables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 254\u001b[0m \u001b[43m \u001b[49m\u001b[43mrow_limit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrow_limit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 255\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[38;5;66;03m# Check from row limit warning and convert to MaxRowsError\u001b[39;00m\n\u001b[1;32m 258\u001b[0m handle_row_limit_exceeded(row_limit, warnings)\n",
- "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/vegafusion/runtime.py:371\u001b[0m, in \u001b[0;36mVegaFusionRuntime.pre_transform_spec\u001b[0;34m(self, spec, local_tz, default_input_tz, row_limit, preserve_interactivity, inline_datasets, keep_signals, keep_datasets, data_encoding_threshold, data_encoding_format)\u001b[0m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_encoding_threshold \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 371\u001b[0m new_spec, warnings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedded_runtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpre_transform_spec\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 372\u001b[0m \u001b[43m \u001b[49m\u001b[43mspec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 373\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_tz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_tz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[43m \u001b[49m\u001b[43mdefault_input_tz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault_input_tz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 375\u001b[0m \u001b[43m \u001b[49m\u001b[43mrow_limit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrow_limit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 376\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreserve_interactivity\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreserve_interactivity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[43m \u001b[49m\u001b[43minline_datasets\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mimported_inline_dataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 378\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_signals\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_signals\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 379\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_datasets\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_datasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 380\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 382\u001b[0m \u001b[38;5;66;03m# Use pre_transform_extract to extract large datasets\u001b[39;00m\n\u001b[1;32m 383\u001b[0m new_spec, datasets, warnings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedded_runtime\u001b[38;5;241m.\u001b[39mpre_transform_extract(\n\u001b[1;32m 384\u001b[0m spec,\n\u001b[1;32m 385\u001b[0m local_tz\u001b[38;5;241m=\u001b[39mlocal_tz,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 392\u001b[0m keep_datasets\u001b[38;5;241m=\u001b[39mkeep_datasets,\n\u001b[1;32m 393\u001b[0m )\n",
- "\u001b[0;31mValueError\u001b[0m: DataFusion error: This feature is not implemented: Unsupported TRY_CAST from Float64 to Null\n Context[0]: Failed to get node value\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "alt.HConcatChart(...)"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# Assuming df is your DataFrame from the previous analysis\n",
"\n",
@@ -1749,87 +626,9 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- ""
- ],
- "text/plain": [
- "alt.Chart(...)"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# Create a KDE plot for the 'total' column\n",
"kde_chart = (\n",
@@ -1846,639 +645,6 @@
"# Display the KDE chart\n",
"kde_chart.display()"
]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Test 2, testing the old pandas way"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "ename": "SystemExit",
- "evalue": "Stopping the notebook execution here.",
- "output_type": "error",
- "traceback": [
- "An exception has occurred, use %tb to see the full traceback.\n",
- "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m Stopping the notebook execution here.\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/root/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3585: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
- " warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
- ]
- }
- ],
- "source": [
- "raise SystemExit(\"Stopping the notebook execution here.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# show each unique value with how many times it occurs in that column\n",
- "energy_use_lf_1[\"energy_use\"].value_counts()\n",
- "# now plot that in a simple histogram but only the 100 most common values\n",
- "# round the values to max 3 after the comma\n",
- "energy_use_lf_1[\"energy_use\"].round(3).value_counts().head(40).plot(kind=\"bar\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "C:\\Users\\oscar\\AppData\\Local\\Temp\\ipykernel_3400\\3503598125.py:6: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n",
- " energy_use_hourly = energy_use_series.resample('H').sum()\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "import seaborn as sns\n",
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "# Resample the data to hourly intervals\n",
- "energy_use_hourly = energy_use_series.resample(\"H\").sum() # noqa: F821\n",
- "\n",
- "# Reshape the data to a matrix with days as rows and hours as columns\n",
- "energy_use_matrix = energy_use_hourly.values.reshape(-1, 24)\n",
- "\n",
- "# Create a dataframe with the reshaped data\n",
- "energy_use_df_heatmap = pd.DataFrame(energy_use_matrix, columns=range(24))\n",
- "\n",
- "# Create a figure and axes for the heatmap\n",
- "fig, ax = plt.subplots(figsize=(10, 6))\n",
- "\n",
- "# Create the heatmap using seaborn\n",
- "sns.heatmap(energy_use_df_heatmap, cmap=\"YlGnBu\", ax=ax)\n",
- "\n",
- "# Set the labels and title\n",
- "ax.set_xlabel(\"Hour of Day\")\n",
- "ax.set_ylabel(\"Day of Month\")\n",
- "ax.set_title(\"Energy Use Heatmap\")\n",
- "\n",
- "\n",
- "# Set the y-axis limits to show only 1 month\n",
- "ax.set_ylim(0, 30)\n",
- "\n",
- "# Show the heatmap\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " energy_use \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 2022-12-31 23:00:00 \n",
- " 0.025 \n",
- " \n",
- " \n",
- " 2022-12-31 23:15:00 \n",
- " 0.017 \n",
- " \n",
- " \n",
- " 2022-12-31 23:30:00 \n",
- " 0.023 \n",
- " \n",
- " \n",
- " 2022-12-31 23:45:00 \n",
- " 0.024 \n",
- " \n",
- " \n",
- " 2023-01-01 00:00:00 \n",
- " 0.023 \n",
- " \n",
- " \n",
- " ... \n",
- " ... \n",
- " \n",
- " \n",
- " 2023-12-31 21:45:00 \n",
- " 0.024 \n",
- " \n",
- " \n",
- " 2023-12-31 22:00:00 \n",
- " 0.022 \n",
- " \n",
- " \n",
- " 2023-12-31 22:15:00 \n",
- " 0.046 \n",
- " \n",
- " \n",
- " 2023-12-31 22:30:00 \n",
- " 0.035 \n",
- " \n",
- " \n",
- " 2023-12-31 22:45:00 \n",
- " 0.027 \n",
- " \n",
- " \n",
- "
\n",
- "
35040 rows × 1 columns
\n",
- "
"
- ],
- "text/plain": [
- " energy_use\n",
- "2022-12-31 23:00:00 0.025\n",
- "2022-12-31 23:15:00 0.017\n",
- "2022-12-31 23:30:00 0.023\n",
- "2022-12-31 23:45:00 0.024\n",
- "2023-01-01 00:00:00 0.023\n",
- "... ...\n",
- "2023-12-31 21:45:00 0.024\n",
- "2023-12-31 22:00:00 0.022\n",
- "2023-12-31 22:15:00 0.046\n",
- "2023-12-31 22:30:00 0.035\n",
- "2023-12-31 22:45:00 0.027\n",
- "\n",
- "[35040 rows x 1 columns]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "C:\\Users\\oscar\\AppData\\Local\\Temp\\ipykernel_3400\\784061356.py:16: FutureWarning: \n",
- "\n",
- "`shade` is now deprecated in favor of `fill`; setting `fill=True`.\n",
- "This will become an error in seaborn v0.14.0; please update your code.\n",
- "\n",
- " sns.kdeplot(energy_use_series, shade=True)\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "import pandas as pd\n",
- "import seaborn as sns\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "# Read in pandas series from a json file\n",
- "energy_use_lf_1 = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n",
- "energy_use_lf_1.columns = [\"energy_use\"]\n",
- "energy_use_lf_1.Name = \"energy_use\"\n",
- "display(energy_use_lf_1)\n",
- "\n",
- "# Convert DataFrame to Series\n",
- "energy_use_series = energy_use_lf_1.squeeze()\n",
- "\n",
- "# Plot KDE to identify the most common usage levels\n",
- "plt.figure(figsize=(10, 6))\n",
- "sns.kdeplot(energy_use_series, shade=True)\n",
- "plt.title(\"Kernel Density Estimation of Energy Usage\")\n",
- "plt.xlabel(\"Energy Use\")\n",
- "plt.ylabel(\"Density\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " energy_use \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 2022-12-31 23:00:00 \n",
- " 0.025 \n",
- " \n",
- " \n",
- " 2022-12-31 23:15:00 \n",
- " 0.017 \n",
- " \n",
- " \n",
- " 2022-12-31 23:30:00 \n",
- " 0.023 \n",
- " \n",
- " \n",
- " 2022-12-31 23:45:00 \n",
- " 0.024 \n",
- " \n",
- " \n",
- " 2023-01-01 00:00:00 \n",
- " 0.023 \n",
- " \n",
- " \n",
- " ... \n",
- " ... \n",
- " \n",
- " \n",
- " 2023-12-31 21:45:00 \n",
- " 0.024 \n",
- " \n",
- " \n",
- " 2023-12-31 22:00:00 \n",
- " 0.022 \n",
- " \n",
- " \n",
- " 2023-12-31 22:15:00 \n",
- " 0.046 \n",
- " \n",
- " \n",
- " 2023-12-31 22:30:00 \n",
- " 0.035 \n",
- " \n",
- " \n",
- " 2023-12-31 22:45:00 \n",
- " 0.027 \n",
- " \n",
- " \n",
- "
\n",
- "
35040 rows × 1 columns
\n",
- "
"
- ],
- "text/plain": [
- " energy_use\n",
- "2022-12-31 23:00:00 0.025\n",
- "2022-12-31 23:15:00 0.017\n",
- "2022-12-31 23:30:00 0.023\n",
- "2022-12-31 23:45:00 0.024\n",
- "2023-01-01 00:00:00 0.023\n",
- "... ...\n",
- "2023-12-31 21:45:00 0.024\n",
- "2023-12-31 22:00:00 0.022\n",
- "2023-12-31 22:15:00 0.046\n",
- "2023-12-31 22:30:00 0.035\n",
- "2023-12-31 22:45:00 0.027\n",
- "\n",
- "[35040 rows x 1 columns]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "C:\\Users\\oscar\\AppData\\Local\\Temp\\ipykernel_3400\\1600994407.py:21: FutureWarning: \n",
- "\n",
- "`shade` is now deprecated in favor of `fill`; setting `fill=True`.\n",
- "This will become an error in seaborn v0.14.0; please update your code.\n",
- "\n",
- " sns.kdeplot(energy_use_series, shade=True)\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "import pandas as pd\n",
- "import seaborn as sns\n",
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "\n",
- "# Read in pandas series from a json file\n",
- "energy_use_lf_1 = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n",
- "energy_use_lf_1.columns = [\"energy_use\"]\n",
- "energy_use_lf_1.Name = \"energy_use\"\n",
- "display(energy_use_lf_1)\n",
- "\n",
- "# Convert DataFrame to Series\n",
- "energy_use_series = energy_use_lf_1.squeeze()\n",
- "\n",
- "# Calculate percentiles\n",
- "percentiles = [1, 5, 10]\n",
- "percentile_values = np.percentile(energy_use_series, percentiles)\n",
- "\n",
- "# Plot KDE to identify the most common usage levels\n",
- "plt.figure(figsize=(10, 6))\n",
- "sns.kdeplot(energy_use_series, shade=True)\n",
- "\n",
- "# Plot vertical lines for percentiles\n",
- "for p, value in zip(percentiles, percentile_values):\n",
- " plt.axvline(value, linestyle=\"--\", label=f\"{p}th Percentile: {value:.3f}\")\n",
- "\n",
- "plt.title(\"Kernel Density Estimation of Energy Usage with Percentiles\")\n",
- "plt.xlabel(\"Energy Use\")\n",
- "plt.ylabel(\"Density\")\n",
- "plt.legend()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from scipy import stats\n",
- "from scipy.fft import fft\n",
- "from ruptures import Pelt\n",
- "\n",
- "# from ruptures.costs import GaussianChangesCost\n",
- "from statsmodels.tsa.seasonal import STL\n",
- "\n",
- "# Load and preprocess the data\n",
- "\n",
- "data = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n",
- "data.columns = [\"usage\"]\n",
- "data.index.name = \"timestamp\"\n",
- "data.index = pd.to_datetime(data.index)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Percentile analysis:\n",
- "5th percentile: 0.018\n",
- "10th percentile: 0.021\n",
- "25th percentile: 0.028\n",
- "50th percentile: 0.048\n",
- "75th percentile: 0.106\n",
- "90th percentile: 0.282\n",
- "95th percentile: 0.434\n"
- ]
- }
- ],
- "source": [
- "# 1. KDE with Percentile Analysis\n",
- "plt.figure(figsize=(12, 6))\n",
- "kde = stats.gaussian_kde(data[\"usage\"])\n",
- "x_range = np.linspace(data[\"usage\"].min(), data[\"usage\"].max(), 1000)\n",
- "plt.plot(x_range, kde(x_range), label=\"KDE\")\n",
- "percentiles = [5, 10, 25, 50, 75, 90, 95]\n",
- "for p in percentiles:\n",
- " value = np.percentile(data[\"usage\"], p)\n",
- " plt.axvline(value, color=\"r\", linestyle=\"--\", alpha=0.5)\n",
- " plt.text(value, plt.ylim()[1], f\"{p}th\", rotation=90, va=\"top\")\n",
- "plt.title(\"Energy Usage Distribution with Percentiles\")\n",
- "plt.xlabel(\"Energy Usage\")\n",
- "plt.ylabel(\"Density\")\n",
- "plt.legend()\n",
- "plt.show()\n",
- "\n",
- "print(\"Percentile analysis:\")\n",
- "for p in percentiles:\n",
- " print(f\"{p}th percentile: {np.percentile(data['usage'], p):.3f}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 2. PELT Change Point Detection\n",
- "model = Pelt(model=\"rbf\", jump=1).fit(data[\"usage\"].values)\n",
- "change_points = model.predict(pen=10)\n",
- "plt.figure(figsize=(12, 6))\n",
- "plt.plot(data.index, data[\"usage\"])\n",
- "for cp in change_points:\n",
- " plt.axvline(data.index[cp], color=\"r\", linestyle=\"--\", alpha=0.5)\n",
- "plt.title(\"Energy Usage with Change Points\")\n",
- "plt.xlabel(\"Time\")\n",
- "plt.ylabel(\"Energy Usage\")\n",
- "plt.show()\n",
- "\n",
- "print(\"\\nDetected change points:\")\n",
- "for cp in change_points:\n",
- " print(f\"Change point at: {data.index[cp]}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Dominant frequencies (cycles per hour):\n",
- "0.0833 (period: 12.00 hours)\n",
- "0.0417 (period: 24.00 hours)\n"
- ]
- }
- ],
- "source": [
- "# 3. Fast Fourier Transform (FFT)\n",
- "fft_result = fft(data[\"usage\"].values)\n",
- "frequencies = np.fft.fftfreq(len(data), d=0.25) # 0.25 hours between samples\n",
- "plt.figure(figsize=(12, 6))\n",
- "plt.plot(frequencies[: len(frequencies) // 2], np.abs(fft_result)[: len(frequencies) // 2])\n",
- "plt.title(\"FFT of Energy Usage\")\n",
- "plt.xlabel(\"Frequency (cycles per hour)\")\n",
- "plt.ylabel(\"Magnitude\")\n",
- "plt.xlim(0, 0.5) # Focus on lower frequencies\n",
- "plt.show()\n",
- "\n",
- "print(\"\\nDominant frequencies (cycles per hour):\")\n",
- "top_frequencies = frequencies[np.argsort(np.abs(fft_result))[-5:]]\n",
- "for freq in top_frequencies:\n",
- " if freq > 0:\n",
- " print(f\"{freq:.4f} (period: {1/freq:.2f} hours)\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# 4. Seasonal Decomposition\n",
- "stl = STL(data[\"usage\"], period=96) # 96 quarters in a day\n",
- "result = stl.fit()\n",
- "fig = result.plot()\n",
- "plt.suptitle(\"Seasonal Decomposition of Energy Usage\")\n",
- "plt.tight_layout()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Vacation Period Analysis (2023-08-11 to 2023-08-28):\n",
- "Average usage during vacation: 0.025\n",
- "Average usage during regular periods: 0.109\n",
- "Standby usage estimate (5th percentile):\n",
- " During vacation: 0.011\n",
- " During regular periods: 0.018\n"
- ]
- }
- ],
- "source": [
- "# 5. Vacation Period Analysis\n",
- "def analyze_vacation_period(start_date, end_date):\n",
- " vacation_data = data.loc[start_date:end_date]\n",
- " regular_data = data.drop(vacation_data.index)\n",
- "\n",
- " print(f\"\\nVacation Period Analysis ({start_date} to {end_date}):\")\n",
- " print(f\"Average usage during vacation: {vacation_data['usage'].mean():.3f}\")\n",
- " print(f\"Average usage during regular periods: {regular_data['usage'].mean():.3f}\")\n",
- " print(\"Standby usage estimate (5th percentile):\")\n",
- " print(f\" During vacation: {np.percentile(vacation_data['usage'], 5):.3f}\")\n",
- " print(f\" During regular periods: {np.percentile(regular_data['usage'], 5):.3f}\")\n",
- "\n",
- "\n",
- "# Example usage:\n",
- "analyze_vacation_period(\"2023-08-11\", \"2023-08-28\")"
- ]
}
],
"metadata": {
diff --git a/openenergyid/baseload/__init__.py b/openenergyid/baseload/__init__.py
index cbf7ffb..c13131d 100644
--- a/openenergyid/baseload/__init__.py
+++ b/openenergyid/baseload/__init__.py
@@ -5,6 +5,7 @@
EnergySchema,
load_data,
calculate_base_load,
+ Granularity,
)
__all__ = [
@@ -12,4 +13,5 @@
"EnergySchema",
"load_data",
"calculate_base_load",
+ "Granularity",
]
diff --git a/openenergyid/baseload/main.py b/openenergyid/baseload/main.py
index fb2dcf1..e5bd804 100644
--- a/openenergyid/baseload/main.py
+++ b/openenergyid/baseload/main.py
@@ -10,26 +10,27 @@
load_data(path: str) -> pl.LazyFrame:
Loads and validates energy usage data from an NDJSON file.
- calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY) -> pl.DataFrame:
- Calculates base load metrics from energy usage data aggregated by the specified timeframe.
+ calculate_base_load(lf: pl.LazyFrame, granularity: Granularity = Granularity.DAILY) -> pl.DataFrame:
+ Calculates base load metrics from energy usage data aggregated by the specified granularity.
- main(file_path: str, timeframe: TimeFrame) -> pl.DataFrame:
- Processes energy data and returns base load metrics for the specified timeframe.
+ main(file_path: str, granularity: Granularity) -> pl.DataFrame:
+ Processes energy data and returns base load metrics for the specified granularity.
"""
-from enum import Enum
from typing import NamedTuple
import polars as pl
import pandera.polars as pa
+from openenergyid.enums import Granularity
## VERY important to use pandera.polars instead of pandera to avoid pandas errors
-
-class TimeFrame(Enum):
- HOURLY = "1h"
- DAILY = "1d"
- WEEKLY = "1w"
- MONTHLY = "1mo"
- YEARLY = "1y"
+# Map Granularity to polars format
+GRANULARITY_TO_POLARS = {
+ Granularity.PT15M: "15m",
+ Granularity.PT1H: "1h",
+ Granularity.P1D: "1d",
+ Granularity.P1M: "1mo",
+ Granularity.P1Y: "1y",
+}
class BaseLoadMetrics(NamedTuple):
@@ -74,12 +75,15 @@ def load_data(path: str) -> pl.LazyFrame:
return pl.LazyFrame(validated_df)
-def calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY) -> pl.DataFrame:
- """Calculate base load metrics aggregated by specified timeframe"""
+def calculate_base_load(
+ lf: pl.LazyFrame, granularity: Granularity = Granularity.P1D
+) -> pl.DataFrame:
+ """Calculate base load metrics aggregated by specified granularity"""
+ polars_interval = GRANULARITY_TO_POLARS[granularity]
return (
lf.filter(pl.col("total") >= 0)
.sort("timestamp")
- .group_by_dynamic("timestamp", every=timeframe.value)
+ .group_by_dynamic("timestamp", every=polars_interval)
.agg(
[
pl.col("total").sum().alias("total_usage"),
@@ -98,12 +102,12 @@ def calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY
)
-def main(file_path: str, timeframe: TimeFrame) -> pl.DataFrame:
- """Process energy data and return base load metrics for specified timeframe"""
- return calculate_base_load(load_data(file_path), timeframe)
+def main(file_path: str, granularity: Granularity) -> pl.DataFrame:
+ """Process energy data and return base load metrics for specified granularity"""
+ return calculate_base_load(load_data(file_path), granularity)
# Example usage:
if __name__ == "__main__":
- results = main("data/energy_use.ndjson", TimeFrame.MONTHLY)
+ results = main("data/PP/energy_use_test1.ndjson", Granularity.P1M)
print(results)
diff --git a/performance_testing.ipynb b/performance_testing.ipynb
new file mode 100644
index 0000000..fd9035e
--- /dev/null
+++ b/performance_testing.ipynb
@@ -0,0 +1,164 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import polars as pl\n",
+ "import json"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# General Performance Testing\n",
+ "\n",
+ "In here we test and try some general things for the codebase.\n",
+ "Fe. the polars efficiency, we try to document and reference relevant docs where needed to keep it peer reviewed."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Some speedtests regarding polars reading in of files/frames/\n",
+ "\n",
+ "references:\n",
+ "* [pandasVSpolars speed test, apr 2023](https://medium.com/cuenex/pandas-2-0-vs-polars-the-ultimate-battle-a378eb75d6d1)\n",
+ "* [input/output in polars](https://docs.pola.rs/api/python/stable/reference/io.html)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## test 1 reading in a newline delimited json to check efficiency\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "9.57 μs ± 218 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "energy_use_df = pl.scan_ndjson(\n",
+ " \"data/PP/energy_use_test1.ndjson\",\n",
+ " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (5, 2) timestamp total datetime[μs, Europe/Brussels] f64 2023-01-01 00:00:00 CET 0.025 2023-01-01 00:15:00 CET 0.017 2023-01-01 00:30:00 CET 0.023 2023-01-01 00:45:00 CET 0.024 2023-01-01 01:00:00 CET 0.023
"
+ ],
+ "text/plain": [
+ "shape: (5, 2)\n",
+ "┌───────────────────────────────┬───────┐\n",
+ "│ timestamp ┆ total │\n",
+ "│ --- ┆ --- │\n",
+ "│ datetime[μs, Europe/Brussels] ┆ f64 │\n",
+ "╞═══════════════════════════════╪═══════╡\n",
+ "│ 2023-01-01 00:00:00 CET ┆ 0.025 │\n",
+ "│ 2023-01-01 00:15:00 CET ┆ 0.017 │\n",
+ "│ 2023-01-01 00:30:00 CET ┆ 0.023 │\n",
+ "│ 2023-01-01 00:45:00 CET ┆ 0.024 │\n",
+ "│ 2023-01-01 01:00:00 CET ┆ 0.023 │\n",
+ "└───────────────────────────────┴───────┘"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "energy_use_lf_1 = pl.scan_ndjson(\n",
+ " \"data/PP/energy_use_test1.ndjson\",\n",
+ " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
+ ")\n",
+ "energy_use_lf_1.collect().head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Test 2, reading in the \"smaller version of the json\" and tranforming it into polars."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "34.5 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "# Read the JSON file\n",
+ "with open(\"data/PP/energy_use.json\", \"r\") as file:\n",
+ " data = json.load(file)\n",
+ "\n",
+ "# Convert the data into a list of dictionaries\n",
+ "data_list = [{\"timestamp\": int(k), \"value\": v} for k, v in data.items()]\n",
+ "\n",
+ "# Create a DataFrame from the list\n",
+ "df = pl.DataFrame(\n",
+ " data_list, schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"value\": pl.Float64}\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "openenergyid-Nm3FK_LY-py3.11",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/poetry.lock b/poetry.lock
index fe83275..1694528 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3481,6 +3481,21 @@ arro3-core = "*"
narwhals = ">=1.13"
packaging = "*"
+[[package]]
+name = "vl-convert-python"
+version = "1.7.0"
+description = "Convert Vega-Lite chart specifications to SVG, PNG, or Vega"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "vl_convert_python-1.7.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:90fba4356bd621bd31e72507a55e26dd13ebe79efa784715743116109afd0d47"},
+ {file = "vl_convert_python-1.7.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:51f99c58b1d0d74126455ece7d41972740cb4430b8dfdf7e0908270eed5be32d"},
+ {file = "vl_convert_python-1.7.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962100d7670b9d35f9bb9745cdf590412f62f57c134b4a142340ba93a4dbddba"},
+ {file = "vl_convert_python-1.7.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b50c492b640abb89a54a71e2c26f0f2d2c1cedc42030cc55bcc202670334724"},
+ {file = "vl_convert_python-1.7.0-cp37-abi3-win_amd64.whl", hash = "sha256:285bbadb1ce8a922c87f6e75a9544fe10a652d37bd4c1519fb93f90bab381588"},
+ {file = "vl_convert_python-1.7.0.tar.gz", hash = "sha256:bc9e1f8ca0d8d3b3789c66e37cd6a8cf0a83406427d5143133346c2b5004485b"},
+]
+
[[package]]
name = "wcwidth"
version = "0.2.13"
@@ -3618,4 +3633,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
-content-hash = "f6f3d28d2fdc940738627c986006bda504c2523d1b2ea1fb337dcb32cac9f54d"
+content-hash = "e9bf1db7eeb34bbd25cb8c99a0c01db53785d16eaf33e9958540ab204e1dea91"
diff --git a/pyproject.toml b/pyproject.toml
index 953b5da..6e2c237 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,3 +69,4 @@ energyid = "^0.0.17"
snakeviz = "^2.2.0"
plotly = "^5.24.1"
vegafusion = {version = ">=1.5.0", extras = ["embed"]}
+vl-convert-python = "^1.7.0"
diff --git a/vis/KDE of EnUsage.png b/vis/KDE of EnUsage.png
new file mode 100644
index 0000000..ad9a1a4
Binary files /dev/null and b/vis/KDE of EnUsage.png differ
diff --git a/vis/heatmap.png b/vis/heatmap.png
new file mode 100644
index 0000000..2f36618
Binary files /dev/null and b/vis/heatmap.png differ