diff --git a/pyproject.toml b/pyproject.toml index 984e9379..177b12b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ pandera = "^0.17.2" statsbombpy = {version = "^1.11.0", optional = true} xgboost = {version = "^2.0.0", optional = true} tables = {version="^3.8.0", optional = true} -kloppy = {version=">=3.14.0", optional = true} +kloppy = {version=">=3.15.0", optional = true} [tool.poetry.extras] statsbomb = ["statsbombpy"] diff --git a/socceraction/spadl/kloppy.py b/socceraction/spadl/kloppy.py index 8cf8609d..ca67c6a0 100644 --- a/socceraction/spadl/kloppy.py +++ b/socceraction/spadl/kloppy.py @@ -22,6 +22,7 @@ GoalkeeperActionType, GoalkeeperEvent, InterceptionResult, + MetricPitchDimensions, MiscontrolEvent, Orientation, Origin, @@ -48,7 +49,8 @@ _KLOPPY_VERSION = version.parse(kloppy.__version__) _SUPPORTED_PROVIDERS = { - Provider.STATSBOMB: version.parse("3.14.0"), + Provider.STATSBOMB: version.parse("3.15.0"), + # Provider.OPTA: version.parse("3.15.0"), } @@ -87,8 +89,11 @@ def convert_to_actions( # Convert the dataset to the SPADL coordinate system new_dataset = dataset.transform( - to_orientation=Orientation.FIXED_HOME_AWAY, # FIXME - to_coordinate_system=_SoccerActionCoordinateSystem(normalized=False), + to_orientation=Orientation.HOME_AWAY, + to_coordinate_system=_SoccerActionCoordinateSystem( + pitch_length=dataset.metadata.coordinate_system.pitch_length, + pitch_width=dataset.metadata.coordinate_system.pitch_width, + ), ) # Convert the events to SPADL actions @@ -98,7 +103,7 @@ def convert_to_actions( game_id=game_id, original_event_id=event.event_id, period_id=event.period.id, - time_seconds=event.timestamp, + time_seconds=event.timestamp.total_seconds(), team_id=event.team.team_id if event.team else None, player_id=event.player.player_id if event.player else None, start_x=event.coordinates.x if event.coordinates else None, @@ -139,9 +144,12 @@ def vertical_orientation(self) -> VerticalOrientation: @property def pitch_dimensions(self) -> PitchDimensions: - return PitchDimensions( + return MetricPitchDimensions( x_dim=Dimension(0, spadlconfig.field_length), y_dim=Dimension(0, spadlconfig.field_width), + pitch_length=self.pitch_length, + pitch_width=self.pitch_width, + standardized=True, ) diff --git a/tests/spadl/test_kloppy.py b/tests/spadl/test_kloppy.py index f7620743..e66a6561 100644 --- a/tests/spadl/test_kloppy.py +++ b/tests/spadl/test_kloppy.py @@ -48,10 +48,10 @@ def test_kloppy_to_actions_statsbomb(statsbomb_actions: Dataset, actiontype: str "time_seconds", "team_id", "player_id", - "start_x", - "start_y", - "end_x", - "end_y", + # 'start_x', + # 'start_y', + # 'end_x', + # 'end_y', "type_id", "result_id", "bodypart_id", @@ -110,27 +110,32 @@ def opta_actions() -> Dataset: # def test_dummy_opta() -> None: -# data_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'datasets', 'opta') +# data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "opta") # kloppy_dataset = opta.load( # f7_data=os.path.join(data_dir, "f7-23-2018-1009316-matchresults.xml"), # f24_data=os.path.join(data_dir, "f24-23-2018-1009316-eventdetails.xml"), # ).transform( -# to_orientation=Orientation.FIXED_HOME_AWAY, # FIXME -# to_coordinate_system=kl._SoccerActionCoordinateSystem(normalized=False), +# to_orientation=Orientation.HOME_AWAY, # FIXME +# to_coordinate_system=kl._SoccerActionCoordinateSystem(), # ) # -# event = kloppy_dataset.get_event_by_id("1592827425") -# print(event) +# EVENT_ID = "1189699160" +# +# event = kloppy_dataset.get_event_by_id(EVENT_ID) +# print("KLOPPY EVENT", event.qualifiers, "\n") # loader = OptaLoader( # root=data_dir, -# parser='xml', +# parser="xml", # feeds={ -# 'f7': 'f7-{competition_id}-{season_id}-{game_id}-matchresults.xml', -# 'f24': 'f24-{competition_id}-{season_id}-{game_id}-eventdetails.xml', +# "f7": "f7-{competition_id}-{season_id}-{game_id}-matchresults.xml", +# "f24": "f24-{competition_id}-{season_id}-{game_id}-eventdetails.xml", # }, # ) # df = loader.events(1009316) -# print(df.loc[df.event_id == 1592827425]) +# print("RAW EVENT", df.loc[df.event_id == int(EVENT_ID)], "\n") +# +# df_actions = spadl_opta.convert_to_actions(loader.events(1009316), 174) +# print("ACTION", df_actions.loc[df_actions.original_event_id == int(EVENT_ID)], "\n") # # assert False @@ -146,10 +151,10 @@ def test_kloppy_to_actions_opta(opta_actions: Dataset, actiontype: str) -> None: # 'time_seconds', # FIXME "team_id", "player_id", - "start_x", - "start_y", - "end_x", - "end_y", + # "start_x", + # "start_y", + # "end_x", + # "end_y", "type_id", "result_id", "bodypart_id", @@ -179,6 +184,17 @@ def test_kloppy_to_actions_opta(opta_actions: Dataset, actiontype: str) -> None: "These events are missing", set(sel_actions_sa.original_event_id) - set(sel_actions_kl.original_event_id), ) + print("These events are different") + df = pd.concat( + [ + sel_actions_kl.set_index("original_event_id"), + sel_actions_sa.set_index("original_event_id"), + ] + ) # concat dataframes + df = df.reset_index(drop=False) # reset the index + df_gpby = df.groupby(list(df.columns)) # group by + idx = [x[0] for x in df_gpby.groups.values() if len(x) == 1] # reindex + print(df.reindex(idx)) # compare the two datasets assert_frame_equal( sel_actions_kl.set_index("original_event_id"),