Skip to content

Commit

Permalink
Rewrite lap rep calculation with groupby and merge
Browse files Browse the repository at this point in the history
Reconciled against 2022 and 2024 race data. Summary statistics are identical
  • Loading branch information
Casper-Guo committed Aug 3, 2024
1 parent 359422e commit 16fa621
Showing 1 changed file with 9 additions and 28 deletions.
37 changes: 9 additions & 28 deletions f1_visualization/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,24 +538,6 @@ def pct_from_fastest(row):
return df_laps


def find_lap_reps(df_laps: pd.DataFrame) -> dict[int, dict[int, float]]:
"""
Find the median lap times for every lap.
Requires:
df_laps has the following columns: [`RoundNumber`,
`LapNumber`,
`IsValid`,
`LapTime`]
"""
return (
df_laps.groupby(["RoundNumber", "LapNumber"])["LapTime"]
.median()
.round(decimals=3)
.to_dict()
)


def add_lap_rep_deltas(df_laps: pd.DataFrame) -> pd.DataFrame:
"""
Add two columns that calculate the difference to the lap representative time.
Expand All @@ -568,19 +550,18 @@ def add_lap_rep_deltas(df_laps: pd.DataFrame) -> pd.DataFrame:
Requires:
df_laps has the following columns: [`RoundNumber`, `LapTime`]
"""
lap_reps = find_lap_reps(df_laps)

def delta_to_lap_rep(row):
return row.loc["LapTime"] - lap_reps[(row.loc["RoundNumber"], row.loc["LapNumber"])]
lap_reps = (
df_laps.groupby(["RoundNumber", "LapNumber"])["LapTime"].median().round(decimals=3)
)

def pct_from_lap_rep(row):
delta = row.loc["LapTime"] - lap_reps[(row.loc["RoundNumber"], row.loc["LapNumber"])]
return round(delta / lap_reps[(row.loc["RoundNumber"], row.loc["LapNumber"])] * 100, 3)
df_laps = df_laps.merge(lap_reps, on=["RoundNumber", "LapNumber"], suffixes=(None, "_Rep"))

df_laps["DeltaToLapRep"] = df_laps.apply(delta_to_lap_rep, axis=1)
df_laps["PctFromLapRep"] = df_laps.apply(pct_from_lap_rep, axis=1)
df_laps["DeltaToLapRep"] = df_laps["LapTime"] - df_laps["LapTime_Rep"]
df_laps["PctFromLapRep"] = (
(df_laps["LapTime"] - df_laps["LapTime_Rep"]) / df_laps["LapTime_Rep"] * 100
).round(decimals=3)

return df_laps
return df_laps.drop(columns=["LapTime_Rep"])


def find_diff(season: int, dfs: dict[str, pd.DataFrame], session_type: str) -> pd.DataFrame:
Expand Down

0 comments on commit 16fa621

Please sign in to comment.