diff --git a/demo_baseLoad.ipynb b/demo_baseLoad.ipynb index 35d5c00..8471c63 100644 --- a/demo_baseLoad.ipynb +++ b/demo_baseLoad.ipynb @@ -2,398 +2,88 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# imports\n", - "import polars as pl\n", - "import json\n", "import altair as alt\n", + "import polars as pl\n", "\n", "%load_ext autoreload\n", - "%autoreload 2\n", - "# %autoreload?" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# First some speedtests" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## test 1 reading in a newline delimited json to check efficiency\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7.42 μs ± 671 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "energy_use_df = pl.scan_ndjson(\n", - " \"data/PP/energy_use_test1.ndjson\",\n", - " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 2)
timestamptotal
datetime[μs, Europe/Brussels]f64
2023-01-01 00:00:00 CET0.025
2023-01-01 00:15:00 CET0.017
2023-01-01 00:30:00 CET0.023
2023-01-01 00:45:00 CET0.024
2023-01-01 01:00:00 CET0.023
" - ], - "text/plain": [ - "shape: (5, 2)\n", - "┌───────────────────────────────┬───────┐\n", - "│ timestamp ┆ total │\n", - "│ --- ┆ --- │\n", - "│ datetime[μs, Europe/Brussels] ┆ f64 │\n", - "╞═══════════════════════════════╪═══════╡\n", - "│ 2023-01-01 00:00:00 CET ┆ 0.025 │\n", - "│ 2023-01-01 00:15:00 CET ┆ 0.017 │\n", - "│ 2023-01-01 00:30:00 CET ┆ 0.023 │\n", - "│ 2023-01-01 00:45:00 CET ┆ 0.024 │\n", - "│ 2023-01-01 01:00:00 CET ┆ 0.023 │\n", - "└───────────────────────────────┴───────┘" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "energy_use_lf_1 = pl.scan_ndjson(\n", - " \"data/PP/energy_use_test1.ndjson\",\n", - " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n", - ")\n", - "energy_use_lf_1.collect().head()" + "%autoreload 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Test 2, reading in the \"smaller version of the json\" and tranforming it into polars." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "39.9 ms ± 5.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "# Read the JSON file\n", - "with open(\"data/PP/energy_use.json\", \"r\") as file:\n", - " data = json.load(file)\n", + "# Base Load analysis\n", + "\n", + "Demo of a base load analysis for a dossier, we define some KPIs we want to measure.\n", "\n", - "# Convert the data into a list of dictionaries\n", - "data_list = [{\"timestamp\": int(k), \"value\": v} for k, v in data.items()]\n", "\n", - "# Create a DataFrame from the list\n", - "df = pl.DataFrame(\n", - " data_list, schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"value\": pl.Float64}\n", - ")" + "\n", + "## loading in the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Base Load analysis\n", + "# Base Load Analysis Demo\n", "\n", - "## loading in the data" + "This notebook demonstrates how to analyze base load (standby power consumption) in energy usage data. Base load represents the minimum continuous power draw in a system, typically from devices that are always on or in standby mode.\n", + "\n", + "## Key Metrics\n", + "\n", + "We analyze three core metrics:\n", + "1. Base load value in WATTS - Shows the consistent minimum power draw\n", + "2. Energy consumption in kWh - Quantifies power used over time\n", + "3. Base load percentage - Shows what portion of total consumption is baseline\n", + "\n", + "## Data Format Requirements\n", + "\n", + "The analysis expects data in the following format:\n", + "- Timestamp (datetime with timezone 'Europe/Brussels')\n", + "- Total power (float, in kW)\n", + "\n", + "Example input data structure:\n", + "```json\n", + "{\n", + " \"timestamp\": \"2024-01-01T00:00:00+01:00\",\n", + " \"total\": 0.5\n", + "}" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (12, 6)
timestamptotal_usagebase_load_kwhperiod_startbase_percentagebase_load_watts
datetime[μs]f64f64datetime[μs]f64f64
2023-12-01 00:00:00181.1254347.02023-12-31 23:00:002400.0181125.0
2024-01-01 00:00:00148349.2503042074.82024-01-01 00:00:001.39859186450.0
2024-02-01 00:00:00128940.8753541923.62024-02-01 00:00:001.49184780150.0
2024-03-01 00:00:00128139.3753941898.42024-03-01 00:00:001.48151279100.0
2024-04-01 00:00:00116175.5002481705.22024-04-01 00:00:001.46777971050.0
2024-07-01 00:00:00113613.5001881789.22024-07-01 00:00:001.57481374550.0
2024-08-01 00:00:00107086.8752861680.02024-08-01 00:00:001.5688270000.0
2024-09-01 00:00:00114579.5002481730.42024-09-01 00:00:001.51021872100.0
2024-10-01 00:00:00126714.8752341814.42024-10-01 00:00:001.43187675600.0
2024-11-01 00:00:0013360.3750282108.42024-11-01 00:00:0015.78099487850.0
" - ], - "text/plain": [ - "shape: (12, 6)\n", - "┌────────────────┬───────────────┬───────────────┬────────────────┬────────────────┬───────────────┐\n", - "│ timestamp ┆ total_usage ┆ base_load_kwh ┆ period_start ┆ base_percentag ┆ base_load_wat │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ e ┆ ts │\n", - "│ datetime[μs] ┆ f64 ┆ f64 ┆ datetime[μs] ┆ --- ┆ --- │\n", - "│ ┆ ┆ ┆ ┆ f64 ┆ f64 │\n", - "╞════════════════╪═══════════════╪═══════════════╪════════════════╪════════════════╪═══════════════╡\n", - "│ 2023-12-01 ┆ 181.125 ┆ 4347.0 ┆ 2023-12-31 ┆ 2400.0 ┆ 181125.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 23:00:00 ┆ ┆ │\n", - "│ 2024-01-01 ┆ 148349.250304 ┆ 2074.8 ┆ 2024-01-01 ┆ 1.398591 ┆ 86450.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-02-01 ┆ 128940.875354 ┆ 1923.6 ┆ 2024-02-01 ┆ 1.491847 ┆ 80150.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-03-01 ┆ 128139.375394 ┆ 1898.4 ┆ 2024-03-01 ┆ 1.481512 ┆ 79100.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-04-01 ┆ 116175.500248 ┆ 1705.2 ┆ 2024-04-01 ┆ 1.467779 ┆ 71050.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2024-07-01 ┆ 113613.500188 ┆ 1789.2 ┆ 2024-07-01 ┆ 1.574813 ┆ 74550.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-08-01 ┆ 107086.875286 ┆ 1680.0 ┆ 2024-08-01 ┆ 1.56882 ┆ 70000.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-09-01 ┆ 114579.500248 ┆ 1730.4 ┆ 2024-09-01 ┆ 1.510218 ┆ 72100.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-10-01 ┆ 126714.875234 ┆ 1814.4 ┆ 2024-10-01 ┆ 1.431876 ┆ 75600.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-11-01 ┆ 13360.375028 ┆ 2108.4 ┆ 2024-11-01 ┆ 15.780994 ┆ 87850.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "└────────────────┴───────────────┴───────────────┴────────────────┴────────────────┴───────────────┘" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (310, 6)
timestamptotal_usagebase_load_kwhperiod_startbase_percentagebase_load_watts
datetime[μs]f64f64datetime[μs]f64f64
2023-12-31 00:00:00181.1254347.02023-12-31 23:00:002400.0181125.0
2024-01-01 00:00:004403.8750582822.4001152024-01-01 00:00:0064.089014117600.0048
2024-01-02 00:00:004963.0000182570.4000582024-01-02 00:00:0051.791256107100.0024
2024-01-03 00:00:004891.2500182578.8000582024-01-03 00:00:0052.72272107450.0024
2024-01-04 00:00:004745.1250122419.22024-01-04 00:00:0050.982851100800.0
2024-10-31 00:00:004312.0000162158.82024-10-31 00:00:0050.06493589950.0
2024-11-01 00:00:002502.5000122175.62024-11-01 00:00:0086.93706390650.0
2024-11-02 00:00:002762.3752184.02024-11-02 00:00:0079.06240191000.0
2024-11-03 00:00:002585.6250222175.62024-11-03 00:00:0084.14213190650.0
2024-11-04 00:00:005509.8749942167.22024-11-04 00:00:0039.33301690300.0
" - ], - "text/plain": [ - "shape: (310, 6)\n", - "┌─────────────────┬─────────────┬───────────────┬────────────────┬────────────────┬────────────────┐\n", - "│ timestamp ┆ total_usage ┆ base_load_kwh ┆ period_start ┆ base_percentag ┆ base_load_watt │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ e ┆ s │\n", - "│ datetime[μs] ┆ f64 ┆ f64 ┆ datetime[μs] ┆ --- ┆ --- │\n", - "│ ┆ ┆ ┆ ┆ f64 ┆ f64 │\n", - "╞═════════════════╪═════════════╪═══════════════╪════════════════╪════════════════╪════════════════╡\n", - "│ 2023-12-31 ┆ 181.125 ┆ 4347.0 ┆ 2023-12-31 ┆ 2400.0 ┆ 181125.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 23:00:00 ┆ ┆ │\n", - "│ 2024-01-01 ┆ 4403.875058 ┆ 2822.400115 ┆ 2024-01-01 ┆ 64.089014 ┆ 117600.0048 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-01-02 ┆ 4963.000018 ┆ 2570.400058 ┆ 2024-01-02 ┆ 51.791256 ┆ 107100.0024 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-01-03 ┆ 4891.250018 ┆ 2578.800058 ┆ 2024-01-03 ┆ 52.72272 ┆ 107450.0024 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-01-04 ┆ 4745.125012 ┆ 2419.2 ┆ 2024-01-04 ┆ 50.982851 ┆ 100800.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2024-10-31 ┆ 4312.000016 ┆ 2158.8 ┆ 2024-10-31 ┆ 50.064935 ┆ 89950.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-11-01 ┆ 2502.500012 ┆ 2175.6 ┆ 2024-11-01 ┆ 86.937063 ┆ 90650.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-11-02 ┆ 2762.375 ┆ 2184.0 ┆ 2024-11-02 ┆ 79.062401 ┆ 91000.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-11-03 ┆ 2585.625022 ┆ 2175.6 ┆ 2024-11-03 ┆ 84.142131 ┆ 90650.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-11-04 ┆ 5509.874994 ┆ 2167.2 ┆ 2024-11-04 ┆ 39.333016 ┆ 90300.0 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "└─────────────────┴─────────────┴───────────────┴────────────────┴────────────────┴────────────────┘" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (7_416, 6)
timestamptotal_usagebase_load_kwhperiod_startbase_percentagebase_load_watts
datetime[μs]f64f64datetime[μs]f64f64
2023-12-31 23:00:00181.1254347.02023-12-31 23:00:002400.0181125.0
2024-01-01 00:00:00204.7500084914.0001922024-01-01 00:00:002400.0204750.008
2024-01-01 01:00:00182.04368.02024-01-01 01:00:002400.0182000.0
2024-01-01 02:00:00169.754074.02024-01-01 02:00:002400.0169750.0
2024-01-01 03:00:00162.753906.02024-01-01 03:00:002400.0162750.0
2024-11-04 18:00:00243.2499965837.9999042024-11-04 18:00:002400.0243249.996
2024-11-04 19:00:00208.254998.02024-11-04 19:00:002400.0208250.0
2024-11-04 20:00:00199.54788.02024-11-04 20:00:002400.0199500.0
2024-11-04 21:00:00170.6254095.02024-11-04 21:00:002400.0170625.0
2024-11-04 22:00:00124.2500022982.0000482024-11-04 22:00:002400.0124250.002
" - ], - "text/plain": [ - "shape: (7_416, 6)\n", - "┌─────────────────┬─────────────┬───────────────┬────────────────┬────────────────┬────────────────┐\n", - "│ timestamp ┆ total_usage ┆ base_load_kwh ┆ period_start ┆ base_percentag ┆ base_load_watt │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ e ┆ s │\n", - "│ datetime[μs] ┆ f64 ┆ f64 ┆ datetime[μs] ┆ --- ┆ --- │\n", - "│ ┆ ┆ ┆ ┆ f64 ┆ f64 │\n", - "╞═════════════════╪═════════════╪═══════════════╪════════════════╪════════════════╪════════════════╡\n", - "│ 2023-12-31 ┆ 181.125 ┆ 4347.0 ┆ 2023-12-31 ┆ 2400.0 ┆ 181125.0 │\n", - "│ 23:00:00 ┆ ┆ ┆ 23:00:00 ┆ ┆ │\n", - "│ 2024-01-01 ┆ 204.750008 ┆ 4914.000192 ┆ 2024-01-01 ┆ 2400.0 ┆ 204750.008 │\n", - "│ 00:00:00 ┆ ┆ ┆ 00:00:00 ┆ ┆ │\n", - "│ 2024-01-01 ┆ 182.0 ┆ 4368.0 ┆ 2024-01-01 ┆ 2400.0 ┆ 182000.0 │\n", - "│ 01:00:00 ┆ ┆ ┆ 01:00:00 ┆ ┆ │\n", - "│ 2024-01-01 ┆ 169.75 ┆ 4074.0 ┆ 2024-01-01 ┆ 2400.0 ┆ 169750.0 │\n", - "│ 02:00:00 ┆ ┆ ┆ 02:00:00 ┆ ┆ │\n", - "│ 2024-01-01 ┆ 162.75 ┆ 3906.0 ┆ 2024-01-01 ┆ 2400.0 ┆ 162750.0 │\n", - "│ 03:00:00 ┆ ┆ ┆ 03:00:00 ┆ ┆ │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2024-11-04 ┆ 243.249996 ┆ 5837.999904 ┆ 2024-11-04 ┆ 2400.0 ┆ 243249.996 │\n", - "│ 18:00:00 ┆ ┆ ┆ 18:00:00 ┆ ┆ │\n", - "│ 2024-11-04 ┆ 208.25 ┆ 4998.0 ┆ 2024-11-04 ┆ 2400.0 ┆ 208250.0 │\n", - "│ 19:00:00 ┆ ┆ ┆ 19:00:00 ┆ ┆ │\n", - "│ 2024-11-04 ┆ 199.5 ┆ 4788.0 ┆ 2024-11-04 ┆ 2400.0 ┆ 199500.0 │\n", - "│ 20:00:00 ┆ ┆ ┆ 20:00:00 ┆ ┆ │\n", - "│ 2024-11-04 ┆ 170.625 ┆ 4095.0 ┆ 2024-11-04 ┆ 2400.0 ┆ 170625.0 │\n", - "│ 21:00:00 ┆ ┆ ┆ 21:00:00 ┆ ┆ │\n", - "│ 2024-11-04 ┆ 124.250002 ┆ 2982.000048 ┆ 2024-11-04 ┆ 2400.0 ┆ 124250.002 │\n", - "│ 22:00:00 ┆ ┆ ┆ 22:00:00 ┆ ┆ │\n", - "└─────────────────┴─────────────┴───────────────┴────────────────┴────────────────┴────────────────┘" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "from openenergyid.baseload.main import main, TimeFrame\n", + "from openenergyid.baseload.main import main\n", + "from openenergyid.enums import Granularity\n", + "\n", "\n", "# Monthly analysis\n", - "monthly_metrics = main(\"data/PP/energy_use_big.ndjson\", TimeFrame.MONTHLY)\n", + "monthly_metrics = main(\"data/PP/energy_use_big.ndjson\", Granularity.P1M)\n", "\n", "# Daily analysis\n", - "daily_metrics = main(\"data/PP/energy_use_big.ndjson\", TimeFrame.DAILY)\n", + "daily_metrics = main(\"data/PP/energy_use_big.ndjson\", Granularity.P1D)\n", "\n", "# Hourly analysis\n", - "hourly_metrics = main(\"data/PP/energy_use_big.ndjson\", TimeFrame.HOURLY)\n", - "# print the metrics\n", + "hourly_metrics = main(\"data/PP/energy_use_big.ndjson\", Granularity.PT1H)\n", + "\n", "display(monthly_metrics)\n", "display(daily_metrics)\n", - "display(hourly_metrics)\n", - "\n", - "\n", - "# metrics = main(\"data/PP/energy_use_big.ndjson\")\n", - "# display(metrics)\n", - "# print(f\"Base Load: {metrics.base_load_watts:.1f}W\")\n", - "# print(f\"Daily Usage: {metrics.daily_usage_kwh:.1f} kWh\")\n", - "# print(f\"Base Percentage: {metrics.base_percentage:.1f}%\")" + "display(hourly_metrics)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.VConcatChart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "def create_monthly_chart(df):\n", " \"\"\"Create bar chart for monthly data\"\"\"\n", @@ -483,76 +173,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (29_664, 2)
timestamptotal
datetime[μs, Europe/Brussels]f64
2024-01-11 09:15:00 CET119.874992
2024-01-11 09:45:00 CET119.000008
2024-01-11 10:30:00 CET117.25
2024-11-04 11:00:00 CET117.25
2024-01-11 10:15:00 CET116.375
2024-05-20 13:45:00 CEST15.749999
2024-06-09 13:00:00 CEST15.749999
2024-06-09 13:15:00 CEST15.749999
2024-06-09 14:15:00 CEST15.749999
2024-06-23 11:00:00 CEST15.749999
" - ], - "text/plain": [ - "shape: (29_664, 2)\n", - "┌───────────────────────────────┬────────────┐\n", - "│ timestamp ┆ total │\n", - "│ --- ┆ --- │\n", - "│ datetime[μs, Europe/Brussels] ┆ f64 │\n", - "╞═══════════════════════════════╪════════════╡\n", - "│ 2024-01-11 09:15:00 CET ┆ 119.874992 │\n", - "│ 2024-01-11 09:45:00 CET ┆ 119.000008 │\n", - "│ 2024-01-11 10:30:00 CET ┆ 117.25 │\n", - "│ 2024-11-04 11:00:00 CET ┆ 117.25 │\n", - "│ 2024-01-11 10:15:00 CET ┆ 116.375 │\n", - "│ … ┆ … │\n", - "│ 2024-05-20 13:45:00 CEST ┆ 15.749999 │\n", - "│ 2024-06-09 13:00:00 CEST ┆ 15.749999 │\n", - "│ 2024-06-09 13:15:00 CEST ┆ 15.749999 │\n", - "│ 2024-06-09 14:15:00 CEST ┆ 15.749999 │\n", - "│ 2024-06-23 11:00:00 CEST ┆ 15.749999 │\n", - "└───────────────────────────────┴────────────┘" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 2)
timestamptotal
datetime[μs, Europe/Brussels]f64
2024-01-01 00:00:00 CET51.625
2024-01-01 00:15:00 CET50.75
2024-01-01 00:30:00 CET38.5
2024-01-01 00:45:00 CET40.25
2024-01-01 01:00:00 CET59.500004
" - ], - "text/plain": [ - "shape: (5, 2)\n", - "┌───────────────────────────────┬───────────┐\n", - "│ timestamp ┆ total │\n", - "│ --- ┆ --- │\n", - "│ datetime[μs, Europe/Brussels] ┆ f64 │\n", - "╞═══════════════════════════════╪═══════════╡\n", - "│ 2024-01-01 00:00:00 CET ┆ 51.625 │\n", - "│ 2024-01-01 00:15:00 CET ┆ 50.75 │\n", - "│ 2024-01-01 00:30:00 CET ┆ 38.5 │\n", - "│ 2024-01-01 00:45:00 CET ┆ 40.25 │\n", - "│ 2024-01-01 01:00:00 CET ┆ 59.500004 │\n", - "└───────────────────────────────┴───────────┘" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "energy_use_lf_1 = pl.scan_ndjson(\n", " \"data/PP/energy_use_big.ndjson\",\n", @@ -569,95 +192,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_13152/3491075188.py:2: DeprecationWarning: `GroupBy.count` is deprecated. It has been renamed to `len`.\n", - " value_counts = tf.group_by(\"total\").count().sort(\"total\")\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Compute the value counts using Polars\n", "value_counts = tf.group_by(\"total\").count().sort(\"total\")\n", @@ -689,46 +226,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 5)
timestamptotal_daily_usagelowest_recordedmin_power_usage_per_daymax_power_usage_per_day
datetime[μs, Europe/Brussels]f64f64f64f64
2024-01-01 00:00:00 CET4462.50005428.0000022688.0001926468.0
2024-01-02 00:00:00 CET4943.75002224.52352.08400.0
2024-01-03 00:00:00 CET4912.25001625.3752436.08484.0
2024-01-04 00:00:00 CET4757.37501421.8752100.08736.0
2024-01-05 00:00:00 CET4779.2500122.752184.08316.0
" - ], - "text/plain": [ - "shape: (5, 5)\n", - "┌────────────────────┬───────────────────┬─────────────────┬───────────────────┬───────────────────┐\n", - "│ timestamp ┆ total_daily_usage ┆ lowest_recorded ┆ min_power_usage_p ┆ max_power_usage_p │\n", - "│ --- ┆ --- ┆ --- ┆ er_day ┆ er_day │\n", - "│ datetime[μs, ┆ f64 ┆ f64 ┆ --- ┆ --- │\n", - "│ Europe/Brussels] ┆ ┆ ┆ f64 ┆ f64 │\n", - "╞════════════════════╪═══════════════════╪═════════════════╪═══════════════════╪═══════════════════╡\n", - "│ 2024-01-01 ┆ 4462.500054 ┆ 28.000002 ┆ 2688.000192 ┆ 6468.0 │\n", - "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n", - "│ 2024-01-02 ┆ 4943.750022 ┆ 24.5 ┆ 2352.0 ┆ 8400.0 │\n", - "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n", - "│ 2024-01-03 ┆ 4912.250016 ┆ 25.375 ┆ 2436.0 ┆ 8484.0 │\n", - "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n", - "│ 2024-01-04 ┆ 4757.375014 ┆ 21.875 ┆ 2100.0 ┆ 8736.0 │\n", - "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n", - "│ 2024-01-05 ┆ 4779.25001 ┆ 22.75 ┆ 2184.0 ┆ 8316.0 │\n", - "│ 00:00:00 CET ┆ ┆ ┆ ┆ │\n", - "└────────────────────┴───────────────────┴─────────────────┴───────────────────┴───────────────────┘" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "lf = (\n", " energy_use_lf_1.filter(pl.col(\"total\") >= 0)\n", @@ -755,19 +255,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Average Basislast: 90671.5W\n", - "Average Daily Usage: 2176.1 kWh\n", - "Average Percentage: 58.3%\n" - ] - } - ], + "outputs": [], "source": [ "lf = (\n", " energy_use_lf_1.filter(pl.col(\"total\") >= 0)\n", @@ -807,87 +297,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "total_new = (\n", " alt.Chart(df)\n", @@ -924,87 +336,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "month_filter = \"month(datum.timestamp) == 2\" # Altair datetime function syntax\n", "\n", @@ -1042,88 +376,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Resample to monthly totals\n", "monthly_lf = (\n", @@ -1171,46 +426,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (11, 3)
timestamptotal_monthly_usagebasislast_monthly_kwh
datetime[μs, Europe/Brussels]f64f64
2024-01-01 00:00:00 CET148429.75030462244.0
2024-02-01 00:00:00 CET128935.62535257708.0
2024-03-01 00:00:00 CET128010.75039856952.0
2024-04-01 00:00:00 CEST116233.25024651156.0
2024-05-01 00:00:00 CEST118999.12546547879.999136
2024-07-01 00:00:00 CEST113646.7501953676.0
2024-08-01 00:00:00 CEST107087.75028650400.0
2024-09-01 00:00:00 CEST114583.00024852164.0
2024-10-01 00:00:00 CEST126777.87523454180.0
2024-11-01 00:00:00 CET13460.12502863252.0
" - ], - "text/plain": [ - "shape: (11, 3)\n", - "┌───────────────────────────────┬─────────────────────┬───────────────────────┐\n", - "│ timestamp ┆ total_monthly_usage ┆ basislast_monthly_kwh │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ datetime[μs, Europe/Brussels] ┆ f64 ┆ f64 │\n", - "╞═══════════════════════════════╪═════════════════════╪═══════════════════════╡\n", - "│ 2024-01-01 00:00:00 CET ┆ 148429.750304 ┆ 62244.0 │\n", - "│ 2024-02-01 00:00:00 CET ┆ 128935.625352 ┆ 57708.0 │\n", - "│ 2024-03-01 00:00:00 CET ┆ 128010.750398 ┆ 56952.0 │\n", - "│ 2024-04-01 00:00:00 CEST ┆ 116233.250246 ┆ 51156.0 │\n", - "│ 2024-05-01 00:00:00 CEST ┆ 118999.125465 ┆ 47879.999136 │\n", - "│ … ┆ … ┆ … │\n", - "│ 2024-07-01 00:00:00 CEST ┆ 113646.75019 ┆ 53676.0 │\n", - "│ 2024-08-01 00:00:00 CEST ┆ 107087.750286 ┆ 50400.0 │\n", - "│ 2024-09-01 00:00:00 CEST ┆ 114583.000248 ┆ 52164.0 │\n", - "│ 2024-10-01 00:00:00 CEST ┆ 126777.875234 ┆ 54180.0 │\n", - "│ 2024-11-01 00:00:00 CET ┆ 13460.125028 ┆ 63252.0 │\n", - "└───────────────────────────────┴─────────────────────┴───────────────────────┘" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(monthly_df)" ] @@ -1239,87 +457,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "alt.data_transformers.enable(\"vegafusion\")\n", "\n", @@ -1345,88 +485,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "alt.data_transformers.enable(\"vegafusion\")\n", "alt.Chart(tf).transform_density(\n", @@ -1440,88 +501,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "max = (\n", " alt.Chart(df_extended)\n", @@ -1555,88 +537,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# comparing the options\n", "max + lowest + lowest_new" @@ -1644,35 +547,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "DataFusion error: This feature is not implemented: Unsupported TRY_CAST from Float64 to Null\n Context[0]: Failed to get node value\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/IPython/core/formatters.py:977\u001b[0m, in \u001b[0;36mMimeBundleFormatter.__call__\u001b[0;34m(self, obj, include, exclude)\u001b[0m\n\u001b[1;32m 974\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n\u001b[1;32m 976\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 977\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 978\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 979\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/vegalite/v5/api.py:3417\u001b[0m, in \u001b[0;36mTopLevelMixin._repr_mimebundle_\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 3415\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3416\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m renderer \u001b[38;5;241m:=\u001b[39m renderers\u001b[38;5;241m.\u001b[39mget():\n\u001b[0;32m-> 3417\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrenderer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdct\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/utils/display.py:225\u001b[0m, in \u001b[0;36mHTMLRenderer.__call__\u001b[0;34m(self, spec, **metadata)\u001b[0m\n\u001b[1;32m 223\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[1;32m 224\u001b[0m kwargs\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmetadata, output_div\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_div)\n\u001b[0;32m--> 225\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mspec_to_mimebundle\u001b[49m\u001b[43m(\u001b[49m\u001b[43mspec\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhtml\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/utils/mimebundle.py:122\u001b[0m, in \u001b[0;36mspec_to_mimebundle\u001b[0;34m(spec, format, mode, vega_version, vegaembed_version, vegalite_version, embed_options, engine, **kwargs)\u001b[0m\n\u001b[1;32m 120\u001b[0m internal_mode: Literal[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvega-lite\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvega\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m mode\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m using_vegafusion():\n\u001b[0;32m--> 122\u001b[0m spec \u001b[38;5;241m=\u001b[39m \u001b[43mcompile_with_vegafusion\u001b[49m\u001b[43m(\u001b[49m\u001b[43mspec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 123\u001b[0m internal_mode \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvega\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;66;03m# Default to the embed options set by alt.renderers.set_embed_options\u001b[39;00m\n", - "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/altair/utils/_vegafusion_data.py:250\u001b[0m, in \u001b[0;36mcompile_with_vegafusion\u001b[0;34m(vegalite_spec)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;66;03m# Pre-evaluate transforms in vega spec with vegafusion\u001b[39;00m\n\u001b[1;32m 249\u001b[0m row_limit \u001b[38;5;241m=\u001b[39m data_transformers\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_rows\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 250\u001b[0m transformed_vega_spec, warnings \u001b[38;5;241m=\u001b[39m \u001b[43mvf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mruntime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpre_transform_spec\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mvega_spec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mvf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_local_tz\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43minline_datasets\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minline_tables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 254\u001b[0m \u001b[43m \u001b[49m\u001b[43mrow_limit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrow_limit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 255\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[38;5;66;03m# Check from row limit warning and convert to MaxRowsError\u001b[39;00m\n\u001b[1;32m 258\u001b[0m handle_row_limit_exceeded(row_limit, warnings)\n", - "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/vegafusion/runtime.py:371\u001b[0m, in \u001b[0;36mVegaFusionRuntime.pre_transform_spec\u001b[0;34m(self, spec, local_tz, default_input_tz, row_limit, preserve_interactivity, inline_datasets, keep_signals, keep_datasets, data_encoding_threshold, data_encoding_format)\u001b[0m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_encoding_threshold \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 371\u001b[0m new_spec, warnings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedded_runtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpre_transform_spec\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 372\u001b[0m \u001b[43m \u001b[49m\u001b[43mspec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 373\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_tz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_tz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[43m \u001b[49m\u001b[43mdefault_input_tz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault_input_tz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 375\u001b[0m \u001b[43m \u001b[49m\u001b[43mrow_limit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrow_limit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 376\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreserve_interactivity\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreserve_interactivity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[43m \u001b[49m\u001b[43minline_datasets\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mimported_inline_dataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 378\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_signals\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_signals\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 379\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_datasets\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_datasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 380\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 382\u001b[0m \u001b[38;5;66;03m# Use pre_transform_extract to extract large datasets\u001b[39;00m\n\u001b[1;32m 383\u001b[0m new_spec, datasets, warnings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedded_runtime\u001b[38;5;241m.\u001b[39mpre_transform_extract(\n\u001b[1;32m 384\u001b[0m spec,\n\u001b[1;32m 385\u001b[0m local_tz\u001b[38;5;241m=\u001b[39mlocal_tz,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 392\u001b[0m keep_datasets\u001b[38;5;241m=\u001b[39mkeep_datasets,\n\u001b[1;32m 393\u001b[0m )\n", - "\u001b[0;31mValueError\u001b[0m: DataFusion error: This feature is not implemented: Unsupported TRY_CAST from Float64 to Null\n Context[0]: Failed to get node value\n" - ] - }, - { - "data": { - "text/plain": [ - "alt.HConcatChart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Assuming df is your DataFrame from the previous analysis\n", "\n", @@ -1749,87 +626,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Create a KDE plot for the 'total' column\n", "kde_chart = (\n", @@ -1846,639 +645,6 @@ "# Display the KDE chart\n", "kde_chart.display()" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test 2, testing the old pandas way" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "ename": "SystemExit", - "evalue": "Stopping the notebook execution here.", - "output_type": "error", - "traceback": [ - "An exception has occurred, use %tb to see the full traceback.\n", - "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m Stopping the notebook execution here.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/root/.cache/pypoetry/virtualenvs/openenergyid-Nm3FK_LY-py3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3585: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n", - " warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n" - ] - } - ], - "source": [ - "raise SystemExit(\"Stopping the notebook execution here.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# show each unique value with how many times it occurs in that column\n", - "energy_use_lf_1[\"energy_use\"].value_counts()\n", - "# now plot that in a simple histogram but only the 100 most common values\n", - "# round the values to max 3 after the comma\n", - "energy_use_lf_1[\"energy_use\"].round(3).value_counts().head(40).plot(kind=\"bar\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\oscar\\AppData\\Local\\Temp\\ipykernel_3400\\3503598125.py:6: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n", - " energy_use_hourly = energy_use_series.resample('H').sum()\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import seaborn as sns\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "\n", - "# Resample the data to hourly intervals\n", - "energy_use_hourly = energy_use_series.resample(\"H\").sum() # noqa: F821\n", - "\n", - "# Reshape the data to a matrix with days as rows and hours as columns\n", - "energy_use_matrix = energy_use_hourly.values.reshape(-1, 24)\n", - "\n", - "# Create a dataframe with the reshaped data\n", - "energy_use_df_heatmap = pd.DataFrame(energy_use_matrix, columns=range(24))\n", - "\n", - "# Create a figure and axes for the heatmap\n", - "fig, ax = plt.subplots(figsize=(10, 6))\n", - "\n", - "# Create the heatmap using seaborn\n", - "sns.heatmap(energy_use_df_heatmap, cmap=\"YlGnBu\", ax=ax)\n", - "\n", - "# Set the labels and title\n", - "ax.set_xlabel(\"Hour of Day\")\n", - "ax.set_ylabel(\"Day of Month\")\n", - "ax.set_title(\"Energy Use Heatmap\")\n", - "\n", - "\n", - "# Set the y-axis limits to show only 1 month\n", - "ax.set_ylim(0, 30)\n", - "\n", - "# Show the heatmap\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
energy_use
2022-12-31 23:00:000.025
2022-12-31 23:15:000.017
2022-12-31 23:30:000.023
2022-12-31 23:45:000.024
2023-01-01 00:00:000.023
......
2023-12-31 21:45:000.024
2023-12-31 22:00:000.022
2023-12-31 22:15:000.046
2023-12-31 22:30:000.035
2023-12-31 22:45:000.027
\n", - "

35040 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " energy_use\n", - "2022-12-31 23:00:00 0.025\n", - "2022-12-31 23:15:00 0.017\n", - "2022-12-31 23:30:00 0.023\n", - "2022-12-31 23:45:00 0.024\n", - "2023-01-01 00:00:00 0.023\n", - "... ...\n", - "2023-12-31 21:45:00 0.024\n", - "2023-12-31 22:00:00 0.022\n", - "2023-12-31 22:15:00 0.046\n", - "2023-12-31 22:30:00 0.035\n", - "2023-12-31 22:45:00 0.027\n", - "\n", - "[35040 rows x 1 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\oscar\\AppData\\Local\\Temp\\ipykernel_3400\\784061356.py:16: FutureWarning: \n", - "\n", - "`shade` is now deprecated in favor of `fill`; setting `fill=True`.\n", - "This will become an error in seaborn v0.14.0; please update your code.\n", - "\n", - " sns.kdeplot(energy_use_series, shade=True)\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "import seaborn as sns\n", - "import matplotlib.pyplot as plt\n", - "\n", - "# Read in pandas series from a json file\n", - "energy_use_lf_1 = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n", - "energy_use_lf_1.columns = [\"energy_use\"]\n", - "energy_use_lf_1.Name = \"energy_use\"\n", - "display(energy_use_lf_1)\n", - "\n", - "# Convert DataFrame to Series\n", - "energy_use_series = energy_use_lf_1.squeeze()\n", - "\n", - "# Plot KDE to identify the most common usage levels\n", - "plt.figure(figsize=(10, 6))\n", - "sns.kdeplot(energy_use_series, shade=True)\n", - "plt.title(\"Kernel Density Estimation of Energy Usage\")\n", - "plt.xlabel(\"Energy Use\")\n", - "plt.ylabel(\"Density\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
energy_use
2022-12-31 23:00:000.025
2022-12-31 23:15:000.017
2022-12-31 23:30:000.023
2022-12-31 23:45:000.024
2023-01-01 00:00:000.023
......
2023-12-31 21:45:000.024
2023-12-31 22:00:000.022
2023-12-31 22:15:000.046
2023-12-31 22:30:000.035
2023-12-31 22:45:000.027
\n", - "

35040 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " energy_use\n", - "2022-12-31 23:00:00 0.025\n", - "2022-12-31 23:15:00 0.017\n", - "2022-12-31 23:30:00 0.023\n", - "2022-12-31 23:45:00 0.024\n", - "2023-01-01 00:00:00 0.023\n", - "... ...\n", - "2023-12-31 21:45:00 0.024\n", - "2023-12-31 22:00:00 0.022\n", - "2023-12-31 22:15:00 0.046\n", - "2023-12-31 22:30:00 0.035\n", - "2023-12-31 22:45:00 0.027\n", - "\n", - "[35040 rows x 1 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\oscar\\AppData\\Local\\Temp\\ipykernel_3400\\1600994407.py:21: FutureWarning: \n", - "\n", - "`shade` is now deprecated in favor of `fill`; setting `fill=True`.\n", - "This will become an error in seaborn v0.14.0; please update your code.\n", - "\n", - " sns.kdeplot(energy_use_series, shade=True)\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "import seaborn as sns\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "# Read in pandas series from a json file\n", - "energy_use_lf_1 = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n", - "energy_use_lf_1.columns = [\"energy_use\"]\n", - "energy_use_lf_1.Name = \"energy_use\"\n", - "display(energy_use_lf_1)\n", - "\n", - "# Convert DataFrame to Series\n", - "energy_use_series = energy_use_lf_1.squeeze()\n", - "\n", - "# Calculate percentiles\n", - "percentiles = [1, 5, 10]\n", - "percentile_values = np.percentile(energy_use_series, percentiles)\n", - "\n", - "# Plot KDE to identify the most common usage levels\n", - "plt.figure(figsize=(10, 6))\n", - "sns.kdeplot(energy_use_series, shade=True)\n", - "\n", - "# Plot vertical lines for percentiles\n", - "for p, value in zip(percentiles, percentile_values):\n", - " plt.axvline(value, linestyle=\"--\", label=f\"{p}th Percentile: {value:.3f}\")\n", - "\n", - "plt.title(\"Kernel Density Estimation of Energy Usage with Percentiles\")\n", - "plt.xlabel(\"Energy Use\")\n", - "plt.ylabel(\"Density\")\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from scipy import stats\n", - "from scipy.fft import fft\n", - "from ruptures import Pelt\n", - "\n", - "# from ruptures.costs import GaussianChangesCost\n", - "from statsmodels.tsa.seasonal import STL\n", - "\n", - "# Load and preprocess the data\n", - "\n", - "data = pd.read_json(\"data/PP/energy_use.json\", orient=\"index\")\n", - "data.columns = [\"usage\"]\n", - "data.index.name = \"timestamp\"\n", - "data.index = pd.to_datetime(data.index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Percentile analysis:\n", - "5th percentile: 0.018\n", - "10th percentile: 0.021\n", - "25th percentile: 0.028\n", - "50th percentile: 0.048\n", - "75th percentile: 0.106\n", - "90th percentile: 0.282\n", - "95th percentile: 0.434\n" - ] - } - ], - "source": [ - "# 1. KDE with Percentile Analysis\n", - "plt.figure(figsize=(12, 6))\n", - "kde = stats.gaussian_kde(data[\"usage\"])\n", - "x_range = np.linspace(data[\"usage\"].min(), data[\"usage\"].max(), 1000)\n", - "plt.plot(x_range, kde(x_range), label=\"KDE\")\n", - "percentiles = [5, 10, 25, 50, 75, 90, 95]\n", - "for p in percentiles:\n", - " value = np.percentile(data[\"usage\"], p)\n", - " plt.axvline(value, color=\"r\", linestyle=\"--\", alpha=0.5)\n", - " plt.text(value, plt.ylim()[1], f\"{p}th\", rotation=90, va=\"top\")\n", - "plt.title(\"Energy Usage Distribution with Percentiles\")\n", - "plt.xlabel(\"Energy Usage\")\n", - "plt.ylabel(\"Density\")\n", - "plt.legend()\n", - "plt.show()\n", - "\n", - "print(\"Percentile analysis:\")\n", - "for p in percentiles:\n", - " print(f\"{p}th percentile: {np.percentile(data['usage'], p):.3f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 2. PELT Change Point Detection\n", - "model = Pelt(model=\"rbf\", jump=1).fit(data[\"usage\"].values)\n", - "change_points = model.predict(pen=10)\n", - "plt.figure(figsize=(12, 6))\n", - "plt.plot(data.index, data[\"usage\"])\n", - "for cp in change_points:\n", - " plt.axvline(data.index[cp], color=\"r\", linestyle=\"--\", alpha=0.5)\n", - "plt.title(\"Energy Usage with Change Points\")\n", - "plt.xlabel(\"Time\")\n", - "plt.ylabel(\"Energy Usage\")\n", - "plt.show()\n", - "\n", - "print(\"\\nDetected change points:\")\n", - "for cp in change_points:\n", - " print(f\"Change point at: {data.index[cp]}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Dominant frequencies (cycles per hour):\n", - "0.0833 (period: 12.00 hours)\n", - "0.0417 (period: 24.00 hours)\n" - ] - } - ], - "source": [ - "# 3. Fast Fourier Transform (FFT)\n", - "fft_result = fft(data[\"usage\"].values)\n", - "frequencies = np.fft.fftfreq(len(data), d=0.25) # 0.25 hours between samples\n", - "plt.figure(figsize=(12, 6))\n", - "plt.plot(frequencies[: len(frequencies) // 2], np.abs(fft_result)[: len(frequencies) // 2])\n", - "plt.title(\"FFT of Energy Usage\")\n", - "plt.xlabel(\"Frequency (cycles per hour)\")\n", - "plt.ylabel(\"Magnitude\")\n", - "plt.xlim(0, 0.5) # Focus on lower frequencies\n", - "plt.show()\n", - "\n", - "print(\"\\nDominant frequencies (cycles per hour):\")\n", - "top_frequencies = frequencies[np.argsort(np.abs(fft_result))[-5:]]\n", - "for freq in top_frequencies:\n", - " if freq > 0:\n", - " print(f\"{freq:.4f} (period: {1/freq:.2f} hours)\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 4. Seasonal Decomposition\n", - "stl = STL(data[\"usage\"], period=96) # 96 quarters in a day\n", - "result = stl.fit()\n", - "fig = result.plot()\n", - "plt.suptitle(\"Seasonal Decomposition of Energy Usage\")\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Vacation Period Analysis (2023-08-11 to 2023-08-28):\n", - "Average usage during vacation: 0.025\n", - "Average usage during regular periods: 0.109\n", - "Standby usage estimate (5th percentile):\n", - " During vacation: 0.011\n", - " During regular periods: 0.018\n" - ] - } - ], - "source": [ - "# 5. Vacation Period Analysis\n", - "def analyze_vacation_period(start_date, end_date):\n", - " vacation_data = data.loc[start_date:end_date]\n", - " regular_data = data.drop(vacation_data.index)\n", - "\n", - " print(f\"\\nVacation Period Analysis ({start_date} to {end_date}):\")\n", - " print(f\"Average usage during vacation: {vacation_data['usage'].mean():.3f}\")\n", - " print(f\"Average usage during regular periods: {regular_data['usage'].mean():.3f}\")\n", - " print(\"Standby usage estimate (5th percentile):\")\n", - " print(f\" During vacation: {np.percentile(vacation_data['usage'], 5):.3f}\")\n", - " print(f\" During regular periods: {np.percentile(regular_data['usage'], 5):.3f}\")\n", - "\n", - "\n", - "# Example usage:\n", - "analyze_vacation_period(\"2023-08-11\", \"2023-08-28\")" - ] } ], "metadata": { diff --git a/openenergyid/baseload/__init__.py b/openenergyid/baseload/__init__.py index cbf7ffb..c13131d 100644 --- a/openenergyid/baseload/__init__.py +++ b/openenergyid/baseload/__init__.py @@ -5,6 +5,7 @@ EnergySchema, load_data, calculate_base_load, + Granularity, ) __all__ = [ @@ -12,4 +13,5 @@ "EnergySchema", "load_data", "calculate_base_load", + "Granularity", ] diff --git a/openenergyid/baseload/main.py b/openenergyid/baseload/main.py index fb2dcf1..e5bd804 100644 --- a/openenergyid/baseload/main.py +++ b/openenergyid/baseload/main.py @@ -10,26 +10,27 @@ load_data(path: str) -> pl.LazyFrame: Loads and validates energy usage data from an NDJSON file. - calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY) -> pl.DataFrame: - Calculates base load metrics from energy usage data aggregated by the specified timeframe. + calculate_base_load(lf: pl.LazyFrame, granularity: Granularity = Granularity.DAILY) -> pl.DataFrame: + Calculates base load metrics from energy usage data aggregated by the specified granularity. - main(file_path: str, timeframe: TimeFrame) -> pl.DataFrame: - Processes energy data and returns base load metrics for the specified timeframe. + main(file_path: str, granularity: Granularity) -> pl.DataFrame: + Processes energy data and returns base load metrics for the specified granularity. """ -from enum import Enum from typing import NamedTuple import polars as pl import pandera.polars as pa +from openenergyid.enums import Granularity ## VERY important to use pandera.polars instead of pandera to avoid pandas errors - -class TimeFrame(Enum): - HOURLY = "1h" - DAILY = "1d" - WEEKLY = "1w" - MONTHLY = "1mo" - YEARLY = "1y" +# Map Granularity to polars format +GRANULARITY_TO_POLARS = { + Granularity.PT15M: "15m", + Granularity.PT1H: "1h", + Granularity.P1D: "1d", + Granularity.P1M: "1mo", + Granularity.P1Y: "1y", +} class BaseLoadMetrics(NamedTuple): @@ -74,12 +75,15 @@ def load_data(path: str) -> pl.LazyFrame: return pl.LazyFrame(validated_df) -def calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY) -> pl.DataFrame: - """Calculate base load metrics aggregated by specified timeframe""" +def calculate_base_load( + lf: pl.LazyFrame, granularity: Granularity = Granularity.P1D +) -> pl.DataFrame: + """Calculate base load metrics aggregated by specified granularity""" + polars_interval = GRANULARITY_TO_POLARS[granularity] return ( lf.filter(pl.col("total") >= 0) .sort("timestamp") - .group_by_dynamic("timestamp", every=timeframe.value) + .group_by_dynamic("timestamp", every=polars_interval) .agg( [ pl.col("total").sum().alias("total_usage"), @@ -98,12 +102,12 @@ def calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY ) -def main(file_path: str, timeframe: TimeFrame) -> pl.DataFrame: - """Process energy data and return base load metrics for specified timeframe""" - return calculate_base_load(load_data(file_path), timeframe) +def main(file_path: str, granularity: Granularity) -> pl.DataFrame: + """Process energy data and return base load metrics for specified granularity""" + return calculate_base_load(load_data(file_path), granularity) # Example usage: if __name__ == "__main__": - results = main("data/energy_use.ndjson", TimeFrame.MONTHLY) + results = main("data/PP/energy_use_test1.ndjson", Granularity.P1M) print(results) diff --git a/performance_testing.ipynb b/performance_testing.ipynb new file mode 100644 index 0000000..fd9035e --- /dev/null +++ b/performance_testing.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import polars as pl\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# General Performance Testing\n", + "\n", + "In here we test and try some general things for the codebase.\n", + "Fe. the polars efficiency, we try to document and reference relevant docs where needed to keep it peer reviewed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Some speedtests regarding polars reading in of files/frames/\n", + "\n", + "references:\n", + "* [pandasVSpolars speed test, apr 2023](https://medium.com/cuenex/pandas-2-0-vs-polars-the-ultimate-battle-a378eb75d6d1)\n", + "* [input/output in polars](https://docs.pola.rs/api/python/stable/reference/io.html)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## test 1 reading in a newline delimited json to check efficiency\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9.57 μs ± 218 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "energy_use_df = pl.scan_ndjson(\n", + " \"data/PP/energy_use_test1.ndjson\",\n", + " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 2)
timestamptotal
datetime[μs, Europe/Brussels]f64
2023-01-01 00:00:00 CET0.025
2023-01-01 00:15:00 CET0.017
2023-01-01 00:30:00 CET0.023
2023-01-01 00:45:00 CET0.024
2023-01-01 01:00:00 CET0.023
" + ], + "text/plain": [ + "shape: (5, 2)\n", + "┌───────────────────────────────┬───────┐\n", + "│ timestamp ┆ total │\n", + "│ --- ┆ --- │\n", + "│ datetime[μs, Europe/Brussels] ┆ f64 │\n", + "╞═══════════════════════════════╪═══════╡\n", + "│ 2023-01-01 00:00:00 CET ┆ 0.025 │\n", + "│ 2023-01-01 00:15:00 CET ┆ 0.017 │\n", + "│ 2023-01-01 00:30:00 CET ┆ 0.023 │\n", + "│ 2023-01-01 00:45:00 CET ┆ 0.024 │\n", + "│ 2023-01-01 01:00:00 CET ┆ 0.023 │\n", + "└───────────────────────────────┴───────┘" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy_use_lf_1 = pl.scan_ndjson(\n", + " \"data/PP/energy_use_test1.ndjson\",\n", + " schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n", + ")\n", + "energy_use_lf_1.collect().head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test 2, reading in the \"smaller version of the json\" and tranforming it into polars." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "34.5 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "# Read the JSON file\n", + "with open(\"data/PP/energy_use.json\", \"r\") as file:\n", + " data = json.load(file)\n", + "\n", + "# Convert the data into a list of dictionaries\n", + "data_list = [{\"timestamp\": int(k), \"value\": v} for k, v in data.items()]\n", + "\n", + "# Create a DataFrame from the list\n", + "df = pl.DataFrame(\n", + " data_list, schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"value\": pl.Float64}\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "openenergyid-Nm3FK_LY-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/poetry.lock b/poetry.lock index fe83275..1694528 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3481,6 +3481,21 @@ arro3-core = "*" narwhals = ">=1.13" packaging = "*" +[[package]] +name = "vl-convert-python" +version = "1.7.0" +description = "Convert Vega-Lite chart specifications to SVG, PNG, or Vega" +optional = false +python-versions = ">=3.7" +files = [ + {file = "vl_convert_python-1.7.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:90fba4356bd621bd31e72507a55e26dd13ebe79efa784715743116109afd0d47"}, + {file = "vl_convert_python-1.7.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:51f99c58b1d0d74126455ece7d41972740cb4430b8dfdf7e0908270eed5be32d"}, + {file = "vl_convert_python-1.7.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962100d7670b9d35f9bb9745cdf590412f62f57c134b4a142340ba93a4dbddba"}, + {file = "vl_convert_python-1.7.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b50c492b640abb89a54a71e2c26f0f2d2c1cedc42030cc55bcc202670334724"}, + {file = "vl_convert_python-1.7.0-cp37-abi3-win_amd64.whl", hash = "sha256:285bbadb1ce8a922c87f6e75a9544fe10a652d37bd4c1519fb93f90bab381588"}, + {file = "vl_convert_python-1.7.0.tar.gz", hash = "sha256:bc9e1f8ca0d8d3b3789c66e37cd6a8cf0a83406427d5143133346c2b5004485b"}, +] + [[package]] name = "wcwidth" version = "0.2.13" @@ -3618,4 +3633,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "f6f3d28d2fdc940738627c986006bda504c2523d1b2ea1fb337dcb32cac9f54d" +content-hash = "e9bf1db7eeb34bbd25cb8c99a0c01db53785d16eaf33e9958540ab204e1dea91" diff --git a/pyproject.toml b/pyproject.toml index 953b5da..6e2c237 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,3 +69,4 @@ energyid = "^0.0.17" snakeviz = "^2.2.0" plotly = "^5.24.1" vegafusion = {version = ">=1.5.0", extras = ["embed"]} +vl-convert-python = "^1.7.0" diff --git a/vis/KDE of EnUsage.png b/vis/KDE of EnUsage.png new file mode 100644 index 0000000..ad9a1a4 Binary files /dev/null and b/vis/KDE of EnUsage.png differ diff --git a/vis/heatmap.png b/vis/heatmap.png new file mode 100644 index 0000000..2f36618 Binary files /dev/null and b/vis/heatmap.png differ