From 42033b8f38944a8cd84dad5cd00bd34458218272 Mon Sep 17 00:00:00 2001 From: Priyanka_43 Date: Tue, 5 Nov 2024 22:48:52 -0500 Subject: [PATCH] Create preparation.py Added function for padding/unpadding of the data. --- preparation.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 preparation.py diff --git a/preparation.py b/preparation.py new file mode 100644 index 0000000..396e936 --- /dev/null +++ b/preparation.py @@ -0,0 +1,63 @@ +""" +This module does padding and unpadding the data +""" +import pandas as pd +import numpy as np + +def pad_time_series(series, target_length, padding_value=None): + """ + Pads the given time series to the target length. + + Parameters: + - series: pd.series + The input time series with datetime index. + - target_length: int + The desired length after padding. + - padding_value: float or None + The value to pad with. If None, will use the mean of the series. + Returns: + - pd.series + The padded time series. + """ + # Determine current length + current_length = len(series) + + # If current length is already equal to or greater than target, return the series + if current_length >= target_length: + return series + + # Determine padding length + padding_length = target_length - current_length + + # If no padding value is provided, calculate a reasonable padding value + if padding_value is None: + padding_value = series.mean() # You could also use median or any other method + + # Create a padding Series + padding_index = pd.date_range(start=series.index[-1] + pd.Timedelta(days=1), + periods=padding_length, freq='D') + padding_series = pd.Series(padding_value, index=padding_index) + + # Concatenate the original series with the padding + padded_series = pd.concat([series, padding_series]) + + return padded_series + + +def unpad_time_series(padded_series, original_length): + """ + Unpads the given time series to the original length. + + Parameters: + - padded_series: pd.Series + The padded time series with datetime index. + - original_length: int + The original length of the series before padding. + + Returns: + - pd.Series + The unpadded time series. + """ + # Return the original length of the series + return padded_series.iloc[:original_length] +