From 9fcf1336e3446deb815695ea1b655c787b5348d7 Mon Sep 17 00:00:00 2001 From: Casper Guo Date: Fri, 2 Aug 2024 12:08:47 +0800 Subject: [PATCH] Rewrite data processing to retain `Time` column This is needed for the gap calculation --- app.py | 15 +++++++-------- f1_visualization/preprocess.py | 5 ++++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/app.py b/app.py index 0a2652d..a66450f 100644 --- a/app.py +++ b/app.py @@ -33,14 +33,13 @@ def df_convert_timedelta(df: pd.DataFrame) -> pd.DataFrame: The pd.Timedelta type is not JSON serializable. Columns with this data type need to be dropped or converted. """ - # The Time column is dropped directly since its information is retained by LapTime - df = df.drop(columns=["Time"]) - # PitOUtTime and PitInTime contains information that we might need later - df[["PitInTime", "PitOutTime"]] = df[["PitInTime", "PitOutTime"]].fillna( - pd.Timedelta(0, unit="ms") - ) - df["PitInTime"] = df["PitInTime"].dt.total_seconds() - df["PitOutTime"] = df["PitOutTime"].dt.total_seconds() + timedelta_columns = ["Time", "PitInTime", "PitOutTime"] + # usually the Time column has no NaT values + # it is included here for consistency + df[timedelta_columns] = df[timedelta_columns].fillna(pd.Timedelta(0, unit="ms")) + + for column in timedelta_columns: + df[column] = df[column].dt.total_seconds() return df diff --git a/f1_visualization/preprocess.py b/f1_visualization/preprocess.py index 3b1676f..dd1ae01 100644 --- a/f1_visualization/preprocess.py +++ b/f1_visualization/preprocess.py @@ -561,7 +561,10 @@ def add_lap_rep_deltas(df_laps: pd.DataFrame) -> pd.DataFrame: (df_laps["LapTime"] - df_laps["LapTime_Rep"]) / df_laps["LapTime_Rep"] * 100 ).round(decimals=3) - return df_laps.drop(columns=["LapTime_Rep"]) + # all data engineering functions fully modify the dataframe in addition to returning them + # this is so this function can be called similarly to others in transform + df_laps = df_laps.drop(columns=["LapTime_Rep"]) + return df_laps # noqa: RET504 def find_diff(season: int, dfs: dict[str, pd.DataFrame], session_type: str) -> pd.DataFrame: