Skip to content

Commit

Permalink
refactor(preprocessing): remove unnecessary module preprocessing by…
Browse files Browse the repository at this point in the history
… moving its utility functionality to where needed
  • Loading branch information
ruancomelli committed Jun 15, 2022
1 parent ee380da commit 4a19b3e
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 70 deletions.
1 change: 0 additions & 1 deletion boiling_learning/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
from boiling_learning.preprocessing.experiment_video import ExperimentVideo
from boiling_learning.preprocessing.experimental_data import *
from boiling_learning.preprocessing.image_datasets import *
from boiling_learning.preprocessing.preprocessing import *
17 changes: 0 additions & 17 deletions boiling_learning/preprocessing/case_data.py

This file was deleted.

50 changes: 48 additions & 2 deletions boiling_learning/preprocessing/experiment_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from boiling_learning.datasets.sliceable import SliceableDataset
from boiling_learning.io import json
from boiling_learning.preprocessing.preprocessing import sync_dataframes
from boiling_learning.preprocessing.video import PimsVideo, VideoFrame, convert_video
from boiling_learning.utils.dataclasses import dataclass, field
from boiling_learning.utils.descriptions import describe
Expand Down Expand Up @@ -275,7 +274,7 @@ def make_dataframe(
def sync_time_series(self, source_df: pd.DataFrame, inplace: bool = True) -> pd.DataFrame:
df = self.make_dataframe(recalculate=False, enforce_time=True, inplace=inplace)

df = sync_dataframes(
df = _sync_dataframes(
source_df=source_df,
dest_df=df,
dest_time_column=self.column_names.elapsed_time,
Expand Down Expand Up @@ -355,3 +354,50 @@ def _encode_video(obj: ExperimentVideo) -> json.JSONDataType:
@describe.instance(ExperimentVideo)
def _describe_video(obj: ExperimentVideo) -> Path:
return obj.path


def _sync_dataframes(
source_df: pd.DataFrame,
dest_df: pd.DataFrame,
source_time_column: Optional[str] = None,
dest_time_column: Optional[str] = None,
) -> pd.DataFrame:
allowed_index = (pd.DatetimeIndex, pd.TimedeltaIndex, pd.Float64Index)

if source_time_column is not None:
source_df = source_df.set_index(source_time_column, drop=False)
if not isinstance(source_df.index, allowed_index):
raise ValueError(
f'the source DataFrame index must be one of {allowed_index}.'
' Ensure this or pass a valid column name as input.'
f' Got {type(source_df.index)}'
)

if dest_time_column is not None:
dest_df = dest_df.set_index(dest_time_column, drop=False)
if not isinstance(dest_df.index, allowed_index):
raise ValueError(
f'the dest DataFrame index must be one of {allowed_index}.'
' Ensure this or pass a valid column name as input.'
f' Got {type(dest_df.index)}'
)

if isinstance(source_df.index, pd.TimedeltaIndex):
source_df.index = source_df.index.total_seconds()

if isinstance(dest_df.index, pd.TimedeltaIndex):
dest_df.index = dest_df.index.total_seconds()

if type(source_df.index) is not type(dest_df.index):
raise ValueError(
f'the source and dest DataFrames indices must have the same type.'
f' Got {type(source_df.index)} and {type(dest_df.index)}'
)

concat = pd.concat([source_df, dest_df]).sort_index()
if isinstance(source_df.index, pd.Float64Index):
concat = concat.interpolate(method='index', limit_direction='both')
else:
concat = concat.interpolate(method='time', limit_direction='both')
concat = concat.loc[dest_df.index]
return concat
50 changes: 0 additions & 50 deletions boiling_learning/preprocessing/preprocessing.py

This file was deleted.

0 comments on commit 4a19b3e

Please sign in to comment.