added capacity analysis with demo nb

EnergieID · Jul 19, 2024 · f9fbfac · f9fbfac
1 parent db75f3a
commit f9fbfac
Show file tree

Hide file tree

Showing 10 changed files with 28,111 additions and 0 deletions.
diff --git a/data/capacity/elec_sample.json b/data/capacity/elec_sample.json
diff --git a/data/capacity/elec_sample2.json b/data/capacity/elec_sample2.json
diff --git a/data/capacity/electricity_delivered_sample.json b/data/capacity/electricity_delivered_sample.json
diff --git a/data/capacity/electricity_delivered_sample2.json b/data/capacity/electricity_delivered_sample2.json
diff --git a/data/capacity/electricity_delivered_series_json_sample.json b/data/capacity/electricity_delivered_series_json_sample.json
diff --git a/demo_capacity_analysis.ipynb b/demo_capacity_analysis.ipynb
diff --git a/openenergyid/capacity/PowerAnalysis.py b/openenergyid/capacity/PowerAnalysis.py
@@ -0,0 +1,34 @@
+import datetime
+
+from openenergyid.capacity.models import CapacityInput
+
+
+class PowerPeakAnalysis:
+    """analysis
+    This class is used to analyze the power peaks of a given time series.
+    The analysis is based on the following parameters:
+    - min_peak_value: The minimum value of a peak to be considered a peak.
+    - num_peaks: The number of peaks to be returned.
+    - from_date: The start date of the analysis.
+    - to_date: The end date of the analysis.
+    - x_padding: The number of days to be added to the start and end date to
+      ensure that the peaks are not cut off.
+    - capacity_input: The input data for the analysis.
+    """
+
+    def __init__(
+        self,
+        min_peak_value: float,
+        num_peaks: int,
+        from_date: datetime,
+        to_date: datetime,
+        x_padding: int = 2,
+        capacity_input=CapacityInput,
+    ):
+        self.data = input.get_series()
+        self.timezone = capacity_input.timezone
+        self.min_peak_value = min_peak_value
+        self.num_peaks = num_peaks
+        self.from_date = from_date
+        self.to_date = to_date
+        self.x_padding = x_padding
diff --git a/openenergyid/capacity/__init__.py b/openenergyid/capacity/__init__.py
@@ -0,0 +1,6 @@
+"""Power Offtake peak analysis module."""
+
+from .models import CapacityInput
+from .main import CapacityAnalysis
+
+__all__ = ["CapacityInput", "CapacityAnalysis"]
diff --git a/openenergyid/capacity/main.py b/openenergyid/capacity/main.py
@@ -0,0 +1,105 @@
+from datetime import datetime, timedelta
+from typing import Dict, List, Tuple
+import pandas as pd
+from openenergyid.capacity.models import CapacityInput
+
+
+class CapacityAnalysis:
+    """
+    A class for performing capacity analysis on a given dataset.
+
+    Attributes:
+        data (CapacityInput): The input data for capacity analysis.
+        threshold (float): The value above which a peak is considered significant.
+        window (str): The window size for grouping data before finding peaks. Defaults to "MS" (month start).
+        x_padding (int): The padding to apply on the x-axis for visualization purposes.
+
+    Methods:
+        find_peaks(): Identifies peaks in the data based on the specified threshold and window.
+        find_peaks_with_surroundings(num_peaks=10): Finds peaks along with their surrounding data points.
+    """
+
+    def __init__(
+        self,
+        data: CapacityInput,
+        threshold: float = 2.5,
+        window: str = "MS",  # Default to month start
+        x_padding: int = 4,
+    ):
+        """
+        Constructs all the necessary attributes for the CapacityAnalysis object.
+
+        Parameters:
+            data (CapacityInput): The input data for capacity analysis.
+            threshold (float): The value above which a peak is considered significant. Defaults to 2.5.
+            window (str): The window size for grouping data before finding peaks. Defaults to "MS" (month start).
+            x_padding (int): The padding to apply on the x-axis for visualization purposes. Defaults to 4.
+        """
+
+        self.data = data
+        self.threshold = threshold
+        self.window = window
+        self.x_padding = x_padding
+
+    def find_peaks(self) -> List[Tuple[datetime, float]]:
+        """
+        Identifies peaks in the data based on the specified threshold and window.
+
+        Returns:
+            List[Tuple[datetime, float]]: A list of tuples where each tuple contains the timestamp (datetime) of the peak and its value (float).
+        """
+        series = self.data.get_series()
+        # Group by the specified window (default is month start)
+        grouped = series.groupby(pd.Grouper(freq=self.window))
+        # Find the index (timestamp) of the maximum value in each group
+        peak_indices = grouped.idxmax()
+        # Get the corresponding peak values
+        peaks = series.loc[peak_indices][series > self.threshold]
+        return [(index, value) for index, value in peaks.items()]
+
+    def find_peaks_with_surroundings(self, num_peaks: int = 10) -> List[Dict]:
+        """
+        Finds peaks along with their surrounding data points.
+
+        Parameters:
+            num_peaks (int): The number of peaks to find. Defaults to 10.
+
+        Returns:
+            List[Dict]: A list of dictionaries, each representing a peak and its surroundings.
+        """
+        series = self.data.get_series()
+        peaks = []
+
+        for i in range(len(series) - 1):
+            if series.iloc[i] > series.iloc[i - 1] and series.iloc[i] > series.iloc[i + 1]:
+                peaks.append((series.index[i], series.iloc[i]))
+
+        peaks.sort(key=lambda x: x[1], reverse=True)
+        top_peaks = peaks[:num_peaks]
+
+        result = []
+        for peak_time, peak_value in top_peaks:
+            start_time = peak_time - timedelta(minutes=15 * self.x_padding)
+            end_time = peak_time + timedelta(minutes=15 * (self.x_padding + 1))
+            surrounding_data = series[start_time:end_time]
+
+            result.append(
+                {
+                    "peak_time": peak_time.isoformat(),
+                    "peak_value": float(peak_value),
+                    "surrounding_data": [
+                        {"timestamp": ts.isoformat(), "value": float(val)}
+                        for ts, val in surrounding_data.items()
+                    ],
+                }
+            )
+
+        return result
+
+    def run_analysis(self):
+        peaks = self.find_peaks()
+        return {
+            "peak_moments": [
+                {"peak_time": peak[0].isoformat(), "peak_value": float(peak[1])} for peak in peaks
+            ]
+        }
diff --git a/openenergyid/capacity/models.py b/openenergyid/capacity/models.py
@@ -0,0 +1,23 @@
+import pandas as pd
+from pydantic import BaseModel, Field
+from openenergyid.models import TimeSeries
+
+
+class CapacityInput(BaseModel):
+    """Model for capacity input"""
+
+    timezone: str = Field(alias="timeZone")
+    series: TimeSeries
+    # fromDate: datetime.datetime
+    # toDate: datetime.datetime
+
+    class Config:
+        populate_by_name = True
+
+    @classmethod
+    def from_pandas(cls, series: pd.Series, timezone: str = "UTC"):
+        return cls(timeZone=timezone, series=TimeSeries.from_pandas(series))
+
+    def get_series(self) -> pd.Series:
+        """Return the pandas series ready for analysis."""
+        return self.series.to_pandas(timezone=self.timezone)