Skip to content

Commit

Permalink
added capacity analysis with demo nb
Browse files Browse the repository at this point in the history
  • Loading branch information
Molier committed Jul 19, 2024
1 parent db75f3a commit f9fbfac
Show file tree
Hide file tree
Showing 10 changed files with 28,111 additions and 0 deletions.
1 change: 1 addition & 0 deletions data/capacity/elec_sample.json

Large diffs are not rendered by default.

6,344 changes: 6,344 additions & 0 deletions data/capacity/elec_sample2.json

Large diffs are not rendered by default.

20,925 changes: 20,925 additions & 0 deletions data/capacity/electricity_delivered_sample.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/capacity/electricity_delivered_sample2.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

671 changes: 671 additions & 0 deletions demo_capacity_analysis.ipynb

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions openenergyid/capacity/PowerAnalysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import datetime

from openenergyid.capacity.models import CapacityInput


class PowerPeakAnalysis:
"""analysis
This class is used to analyze the power peaks of a given time series.
The analysis is based on the following parameters:
- min_peak_value: The minimum value of a peak to be considered a peak.
- num_peaks: The number of peaks to be returned.
- from_date: The start date of the analysis.
- to_date: The end date of the analysis.
- x_padding: The number of days to be added to the start and end date to
ensure that the peaks are not cut off.
- capacity_input: The input data for the analysis.
"""

def __init__(
self,
min_peak_value: float,
num_peaks: int,
from_date: datetime,
to_date: datetime,
x_padding: int = 2,
capacity_input=CapacityInput,
):
self.data = input.get_series()
self.timezone = capacity_input.timezone
self.min_peak_value = min_peak_value
self.num_peaks = num_peaks
self.from_date = from_date
self.to_date = to_date
self.x_padding = x_padding
6 changes: 6 additions & 0 deletions openenergyid/capacity/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Power Offtake peak analysis module."""

from .models import CapacityInput
from .main import CapacityAnalysis

__all__ = ["CapacityInput", "CapacityAnalysis"]
105 changes: 105 additions & 0 deletions openenergyid/capacity/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from datetime import datetime, timedelta
from typing import Dict, List, Tuple
import pandas as pd
from openenergyid.capacity.models import CapacityInput


class CapacityAnalysis:
"""
A class for performing capacity analysis on a given dataset.
Attributes:
data (CapacityInput): The input data for capacity analysis.
threshold (float): The value above which a peak is considered significant.
window (str): The window size for grouping data before finding peaks. Defaults to "MS" (month start).
x_padding (int): The padding to apply on the x-axis for visualization purposes.
Methods:
find_peaks(): Identifies peaks in the data based on the specified threshold and window.
find_peaks_with_surroundings(num_peaks=10): Finds peaks along with their surrounding data points.
"""

def __init__(
self,
data: CapacityInput,
threshold: float = 2.5,
window: str = "MS", # Default to month start
x_padding: int = 4,
):
"""
Constructs all the necessary attributes for the CapacityAnalysis object.
Parameters:
data (CapacityInput): The input data for capacity analysis.
threshold (float): The value above which a peak is considered significant. Defaults to 2.5.
window (str): The window size for grouping data before finding peaks. Defaults to "MS" (month start).
x_padding (int): The padding to apply on the x-axis for visualization purposes. Defaults to 4.
"""

self.data = data
self.threshold = threshold
self.window = window
self.x_padding = x_padding

def find_peaks(self) -> List[Tuple[datetime, float]]:
"""
Identifies peaks in the data based on the specified threshold and window.
Returns:
List[Tuple[datetime, float]]: A list of tuples where each tuple contains the timestamp (datetime) of the peak and its value (float).
"""
series = self.data.get_series()
# Group by the specified window (default is month start)
grouped = series.groupby(pd.Grouper(freq=self.window))
# Find the index (timestamp) of the maximum value in each group
peak_indices = grouped.idxmax()
# Get the corresponding peak values
peaks = series.loc[peak_indices][series > self.threshold]
return [(index, value) for index, value in peaks.items()]

def find_peaks_with_surroundings(self, num_peaks: int = 10) -> List[Dict]:
"""
Finds peaks along with their surrounding data points.
Parameters:
num_peaks (int): The number of peaks to find. Defaults to 10.
Returns:
List[Dict]: A list of dictionaries, each representing a peak and its surroundings.
"""
series = self.data.get_series()
peaks = []

for i in range(len(series) - 1):
if series.iloc[i] > series.iloc[i - 1] and series.iloc[i] > series.iloc[i + 1]:
peaks.append((series.index[i], series.iloc[i]))

peaks.sort(key=lambda x: x[1], reverse=True)
top_peaks = peaks[:num_peaks]

result = []
for peak_time, peak_value in top_peaks:
start_time = peak_time - timedelta(minutes=15 * self.x_padding)
end_time = peak_time + timedelta(minutes=15 * (self.x_padding + 1))
surrounding_data = series[start_time:end_time]

result.append(
{
"peak_time": peak_time.isoformat(),
"peak_value": float(peak_value),
"surrounding_data": [
{"timestamp": ts.isoformat(), "value": float(val)}
for ts, val in surrounding_data.items()
],
}
)

return result

def run_analysis(self):
peaks = self.find_peaks()
return {
"peak_moments": [
{"peak_time": peak[0].isoformat(), "peak_value": float(peak[1])} for peak in peaks
]
}
23 changes: 23 additions & 0 deletions openenergyid/capacity/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pandas as pd
from pydantic import BaseModel, Field
from openenergyid.models import TimeSeries


class CapacityInput(BaseModel):
"""Model for capacity input"""

timezone: str = Field(alias="timeZone")
series: TimeSeries
# fromDate: datetime.datetime
# toDate: datetime.datetime

class Config:
populate_by_name = True

@classmethod
def from_pandas(cls, series: pd.Series, timezone: str = "UTC"):
return cls(timeZone=timezone, series=TimeSeries.from_pandas(series))

def get_series(self) -> pd.Series:
"""Return the pandas series ready for analysis."""
return self.series.to_pandas(timezone=self.timezone)

0 comments on commit f9fbfac

Please sign in to comment.