Merge pull request #12 from EnergieID/develop

Refactor capacity input model
EnergieID · Aug 14, 2024 · bc7ea9e · bc7ea9e
2 parents 0f94f75 + 75cacb9
commit bc7ea9e
Show file tree

Hide file tree

Showing 8 changed files with 100 additions and 104 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,3 +16,9 @@ repos:
     hooks:
       - id: pyupgrade
         args: [--py311-plus]
+
+  # - repo: https://github.com/pre-commit/mirrors-mypy
+  #   rev: "" # Use the sha / tag you want to point at
+  #   hooks:
+  #     - id: mypy
+  #       args: [--strict, --ignore-missing-imports]
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.analysis.typeCheckingMode": "basic"
+}
diff --git a/demo_capacity_analysis.ipynb b/demo_capacity_analysis.ipynb
diff --git a/openenergyid/__init__.py b/openenergyid/__init__.py
@@ -1,6 +1,6 @@
 """Open Energy ID Python SDK."""
 
-__version__ = "0.1.15"
+__version__ = "0.1.16"
 
 from .enums import Granularity
 from .models import TimeDataFrame, TimeSeries

diff --git a/openenergyid/capacity/PowerAnalysis.py b/openenergyid/capacity/PowerAnalysis.py
diff --git a/openenergyid/capacity/main.py b/openenergyid/capacity/main.py
@@ -1,7 +1,9 @@
 """Main module for capacity analysis."""
 
 import datetime as dt
+import typing
 import pandas as pd
+import pandera.typing as pdt
 
 
 class CapacityAnalysis:
@@ -21,7 +23,7 @@ class CapacityAnalysis:
 
     def __init__(
         self,
-        data: pd.Series,
+        data: pdt.Series,
         threshold: float = 2.5,
         window: str = "MS",  # Default to month start
         x_padding: int = 4,
@@ -50,11 +52,12 @@ def find_peaks(self) -> pd.Series:
         """
         # Group by the specified window (default is month start)
         grouped = self.data.groupby(pd.Grouper(freq=self.window))
+
         # Find the index (timestamp) of the maximum value in each group
         peak_indices = grouped.idxmax()
+
         # Get the corresponding peak values
         peaks = self.data.loc[peak_indices][self.data > self.threshold]
-
         return peaks
 
     def find_peaks_with_surroundings(
@@ -69,12 +72,20 @@ def find_peaks_with_surroundings(
         Returns:
             List[tuple[dt.datetime,float,pd.Series]]: A list of tuples containing peak time, peak value, and surrounding data.
         """
-        peaks = self.data.sort_values(ascending=False).head(num_peaks)
+        peaks = self.data.nlargest(num_peaks * 2)
         peaks = peaks[peaks > self.threshold]
         if peaks.empty:
             return []
+
         result = []
+        window_size = dt.timedelta(minutes=15 * (2 * self.x_padding + 1))
+
         for peak_time, peak_value in peaks.items():
+            peak_time = typing.cast(pd.Timestamp, peak_time)
+
+            if any(abs(peak_time - prev_peak[0]) < window_size for prev_peak in result):
+                continue
+
             start_time = peak_time - dt.timedelta(minutes=15 * self.x_padding)
             end_time = peak_time + dt.timedelta(minutes=15 * (self.x_padding + 1))
             surrounding_data = self.data[start_time:end_time]
@@ -86,5 +97,6 @@ def find_peaks_with_surroundings(
                     surrounding_data,
                 ]
             )
-
+            if len(result) == num_peaks:
+                break
         return result
diff --git a/openenergyid/capacity/models.py b/openenergyid/capacity/models.py
@@ -10,6 +10,7 @@ class CapacityInput(BaseModel):
 
     timezone: str = Field(alias="timeZone")
     series: TimeSeries
+    threshold: float = Field(default=2.5, ge=0)
 
 
 class PeakDetail(BaseModel):

diff --git a/openenergyid/models.py b/openenergyid/models.py
@@ -63,7 +63,19 @@ def from_json(cls, string: str | None = None, path: str | None = None, **kwargs)
 
 
 class TimeSeries(TimeSeriesBase):
-    """Time series data with a single column."""
+    """
+    Represents a time series data.
+    Attributes:
+        name (str | None): The name of the time series.
+        data (list[float | None]): The data points of the time series.
+    Methods:
+        replace_nan_with_none(cls, data: list[float]) -> list[float | None]:
+            Replace NaN values with None.
+        from_pandas(cls, data: pd.Series) -> Self:
+            Create a TimeSeries object from a Pandas Series.
+        to_pandas(self, timezone: str = "UTC") -> pd.Series:
+            Convert the TimeSeries object to a Pandas Series.
+    """
 
     name: str | None = None
     data: list[float | None]