From d81ae7d44a8ec34e9761b76c4f213887abd036d7 Mon Sep 17 00:00:00 2001 From: SportsDynamicsDS Date: Wed, 17 Jul 2024 09:26:43 +0000 Subject: [PATCH] Enriched metadata with date, game_week, game_id and coach --- kloppy/_providers/statsbomb.py | 2 ++ kloppy/domain/models/common.py | 9 +++++ .../event/datafactory/deserializer.py | 17 +++++++++ .../serializers/event/sportec/deserializer.py | 24 +++++++++++++ .../event/statsbomb/deserializer.py | 3 +- .../event/statsperform/deserializer.py | 6 ++++ .../event/statsperform/parsers/base.py | 13 ++++++- .../event/statsperform/parsers/f24_xml.py | 30 ++++++++++++++-- .../event/statsperform/parsers/ma1_json.py | 33 +++++++++++++++-- .../event/wyscout/deserializer_v2.py | 4 +++ .../event/wyscout/deserializer_v3.py | 28 +++++++++++++-- .../serializers/tracking/secondspectrum.py | 25 +++++++++++-- .../infra/serializers/tracking/skillcorner.py | 29 +++++++++++++-- .../tracking/sportec/deserializer.py | 15 +++++++- .../serializers/tracking/statsperform.py | 9 ++++- .../serializers/tracking/tracab/tracab_dat.py | 9 ++++- .../files/second_spectrum_fake_metadata.json | 4 ++- kloppy/tests/test_datafactory.py | 18 ++++++++++ kloppy/tests/test_helpers.py | 5 ++- kloppy/tests/test_secondspectrum.py | 22 ++++++++++-- kloppy/tests/test_skillcorner.py | 27 ++++++++++++-- kloppy/tests/test_sportec.py | 35 +++++++++++++++++++ kloppy/tests/test_statsbomb.py | 35 ++++++++++++++++++- kloppy/tests/test_statsperform.py | 27 +++++++++++--- kloppy/tests/test_tracab.py | 18 +++++++++- kloppy/tests/test_wyscout.py | 35 ++++++++++++++++++- 26 files changed, 449 insertions(+), 33 deletions(-) diff --git a/kloppy/_providers/statsbomb.py b/kloppy/_providers/statsbomb.py index d26614ae..dc41e597 100644 --- a/kloppy/_providers/statsbomb.py +++ b/kloppy/_providers/statsbomb.py @@ -18,6 +18,7 @@ def load( event_types: Optional[List[str]] = None, coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, + additional_metadata: dict = {} ) -> EventDataset: """ Load StatsBomb event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset] @@ -48,6 +49,7 @@ def load( lineup_data=lineup_data_fp, three_sixty_data=three_sixty_data_fp, ), + additional_metadata=additional_metadata, ) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 8f1a83d1..f67b0f6f 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1003,6 +1003,10 @@ class Metadata: orientation: See [`Orientation`][kloppy.domain.models.common.Orientation] flags: provider: See [`Provider`][kloppy.domain.models.common.Provider] + date: Date of the game. + game_week: Game week (or match day) of the game. It can also be the stage + (ex: "8th Finals"), if the game is happening during a cup or a play-off. + game_id: Game id of the game from the provider. """ teams: List[Team] @@ -1014,6 +1018,11 @@ class Metadata: coordinate_system: CoordinateSystem score: Optional[Score] = None frame_rate: Optional[float] = None + date: Optional[datetime] = None + game_week: Optional[str] = None + game_id: Optional[str] = None + home_coach: Optional[str] = None + away_coach: Optional[str] = None attributes: Optional[Dict] = field(default_factory=dict, compare=False) def __post_init__(self): diff --git a/kloppy/infra/serializers/event/datafactory/deserializer.py b/kloppy/infra/serializers/event/datafactory/deserializer.py index 88a81dd9..e517251a 100644 --- a/kloppy/infra/serializers/event/datafactory/deserializer.py +++ b/kloppy/infra/serializers/event/datafactory/deserializer.py @@ -1,6 +1,7 @@ import json import logging from datetime import timedelta, datetime, timezone +from dateutil.parser import parse, _parser from dataclasses import replace from typing import Dict, List, Tuple, Union, IO, NamedTuple @@ -453,6 +454,19 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: periods[half], end_timestamp=timestamp ) + try: + date = match["date"] + if date: + date = parse(date).astimezone(timezone.utc) + except _parser.ParserError: + date = None + game_week = match.get("week", None) + if game_week: + game_week = str(game_week) + game_id = match.get("matchId", None) + if game_id: + game_id = str(game_id) + # exclude goals, already listed as shots too incidences.pop(DF_EVENT_CLASS_GOALS) raw_events = [ @@ -613,6 +627,9 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: score=score, provider=Provider.DATAFACTORY, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_week=game_week, + game_id=game_id, ) return EventDataset( diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 4119574c..fefd8589 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -83,6 +83,8 @@ class SportecMetadata(NamedTuple): x_max: float y_max: float fps: int + home_coach: str + away_coach: str def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: @@ -99,10 +101,17 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: home_team = away_team = None for team_elm in team_elms: + head_coach = [ + trainer.attrib["Shortname"] + for trainer in team_elm.TrainerStaff.iterchildren("Trainer") + if trainer.attrib["Role"] == "headcoach" + ] if team_elm.attrib["Role"] == "home": home_team = _team_from_xml_elm(team_elm) + home_coach = head_coach[0] if len(head_coach) else None elif team_elm.attrib["Role"] == "guest": away_team = _team_from_xml_elm(team_elm) + away_coach = head_coach[0] if len(head_coach) else None else: raise DeserializationError( f"Unknown side: {team_elm.attrib['Role']}" @@ -194,6 +203,8 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: x_max=x_max, y_max=y_max, fps=SPORTEC_FPS, + home_coach=home_coach, + away_coach=away_coach, ) @@ -404,12 +415,20 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): + date = parse( + match_root.MatchInformation.General.attrib["KickoffTime"] + ).astimezone(timezone.utc) + game_week = match_root.MatchInformation.General.attrib["MatchDay"] + game_id = match_root.MatchInformation.General.attrib["MatchId"] + sportec_metadata = sportec_metadata_from_xml_elm(match_root) teams = home_team, away_team = sportec_metadata.teams transformer = self.get_transformer( pitch_length=sportec_metadata.x_max, pitch_width=sportec_metadata.y_max, ) + home_coach = sportec_metadata.home_coach + away_coach = sportec_metadata.away_coach periods = [] period_id = 0 @@ -632,6 +651,11 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), provider=Provider.SPORTEC, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_week=game_week, + game_id=game_id, + home_coach=home_coach, + away_coach=away_coach, ) return EventDataset( diff --git a/kloppy/infra/serializers/event/statsbomb/deserializer.py b/kloppy/infra/serializers/event/statsbomb/deserializer.py index 0bfbc130..c933e9e8 100644 --- a/kloppy/infra/serializers/event/statsbomb/deserializer.py +++ b/kloppy/infra/serializers/event/statsbomb/deserializer.py @@ -36,7 +36,7 @@ class StatsBombDeserializer(EventDataDeserializer[StatsBombInputs]): def provider(self) -> Provider: return Provider.STATSBOMB - def deserialize(self, inputs: StatsBombInputs) -> EventDataset: + def deserialize(self, inputs: StatsBombInputs, additional_metadata) -> EventDataset: # Intialize coordinate system transformer self.transformer = self.get_transformer() @@ -118,6 +118,7 @@ def deserialize(self, inputs: StatsBombInputs) -> EventDataset: score=None, provider=Provider.STATSBOMB, coordinate_system=self.transformer.get_to_coordinate_system(), + **additional_metadata ) return EventDataset(metadata=metadata, records=events) diff --git a/kloppy/infra/serializers/event/statsperform/deserializer.py b/kloppy/infra/serializers/event/statsperform/deserializer.py index d25b203c..05f92e3f 100644 --- a/kloppy/infra/serializers/event/statsperform/deserializer.py +++ b/kloppy/infra/serializers/event/statsperform/deserializer.py @@ -614,6 +614,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset: periods = metadata_parser.extract_periods() score = metadata_parser.extract_score() teams = metadata_parser.extract_lineups() + date = events_parser.extract_date() + game_week = events_parser.extract_game_week() + game_id = events_parser.extract_game_id() raw_events = [ event for event in events_parser.extract_events() @@ -827,6 +830,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset: if inputs.event_feed.upper() == "F24" else Provider.STATSPERFORM, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_week=game_week, + game_id=game_id, ) return EventDataset( diff --git a/kloppy/infra/serializers/event/statsperform/parsers/base.py b/kloppy/infra/serializers/event/statsperform/parsers/base.py index 9bb60f43..90a97ffe 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/base.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/base.py @@ -12,7 +12,6 @@ from datetime import datetime from dataclasses import dataclass, field -from typing import List, Optional @dataclass @@ -53,6 +52,18 @@ def extract_score(self) -> Optional[Score]: """Return the score of the game.""" return None + def extract_date(self) -> Optional[str]: + """Return the date of the game.""" + return None + + def extract_game_week(self) -> Optional[str]: + """Return the game_week of the game.""" + return None + + def extract_game_id(self) -> Optional[str]: + """Return the game_id of the game.""" + return None + def extract_lineups(self) -> Tuple[Team, Team]: """Return the home and away team.""" raise NotImplementedError diff --git a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py index 34096577..f32dbd95 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py @@ -1,9 +1,9 @@ """XML parser for Opta F24 feeds.""" import pytz -from datetime import datetime -from typing import List +from datetime import datetime, timezone +from typing import List, Optional +from dateutil.parser import parse -from kloppy.domain import Period from .base import OptaXMLParser, OptaEvent @@ -53,3 +53,27 @@ def extract_events(self) -> List[OptaEvent]: ) for event in game_elm.iterchildren("Event") ] + + def extract_date(self) -> Optional[str]: + """Return the date of the game.""" + game_elm = self.root.find("Game") + if game_elm and "game_date" in game_elm.attrib: + return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc) + else: + return None + + def extract_game_week(self) -> Optional[str]: + """Return the game_week of the game.""" + game_elm = self.root.find("Game") + if game_elm and "matchday" in game_elm.attrib: + return game_elm.attrib["matchday"] + else: + return None + + def extract_game_id(self) -> Optional[str]: + """Return the game_id of the game.""" + game_elm = self.root.find("Game") + if game_elm and "id" in game_elm.attrib: + return game_elm.attrib["id"] + else: + return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py index a3a97ec2..9f4764b7 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py @@ -1,6 +1,6 @@ """JSON parser for Stats Perform MA1 feeds.""" import pytz -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Optional, List, Tuple, Dict from kloppy.domain import Period, Score, Team, Ground, Player @@ -30,7 +30,13 @@ def extract_periods(self) -> List[Period]: return parsed_periods def extract_score(self) -> Optional[Score]: - return None + live_data = self.root["liveData"] + match_details = live_data["matchDetails"] + home_score = match_details["scores"]["total"]["home"] + away_score = match_details["scores"]["total"]["away"] + if home_score is None or away_score is None: + return None + return Score(home=home_score, away=away_score) def extract_lineups(self) -> Tuple[Team, Team]: teams = {} @@ -76,6 +82,29 @@ def extract_lineups(self) -> Tuple[Team, Team]: raise DeserializationError("Lineup incomplete") return home_team, away_team + def extract_date(self) -> Optional[str]: + """Return the date of the game.""" + if "matchInfo" in self.root and "date" in self.root["matchInfo"]: + return datetime.strptime( + self.root["matchInfo"]["date"], "%Y-%m-%dZ" + ).astimezone(timezone.utc) + else: + return None + + def extract_game_week(self) -> Optional[str]: + """Return the game_week of the game.""" + if "matchInfo" in self.root and "week" in self.root["matchInfo"]: + return self.root["matchInfo"]["week"] + else: + return None + + def extract_game_id(self) -> Optional[str]: + """Return the game_id of the game.""" + if "matchInfo" in self.root and "id" in self.root["matchInfo"]: + return self.root["matchInfo"]["id"] + else: + return None + def _parse_teams(self) -> List[Dict[str, Any]]: parsed_teams = [] match_info = self.root["matchInfo"] diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v2.py b/kloppy/infra/serializers/event/wyscout/deserializer_v2.py index a4fc8a0f..896b663d 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v2.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v2.py @@ -499,6 +499,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: for wyId, team in teams.items() ] ) + game_id = raw_events["events"][0].get("matchId", None) + if game_id: + game_id = str(game_id) events = [] @@ -730,6 +733,7 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: flags=None, provider=Provider.WYSCOUT, coordinate_system=transformer.get_to_coordinate_system(), + game_id=game_id, ) return EventDataset(metadata=metadata, records=events) diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py index 2c754fde..ed3c820c 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py @@ -1,8 +1,9 @@ import json import logging from dataclasses import replace -from datetime import timedelta -from typing import Dict, List, Tuple, NamedTuple, IO +from datetime import timedelta, timezone +from dateutil.parser import parse +from typing import Dict, List from kloppy.domain import ( BallOutEvent, @@ -536,6 +537,24 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: for wyId, team in teams.items() ] ) + date = raw_events["match"].get("dateutc") + if date: + date = parse(date).astimezone(timezone.utc) + game_week = raw_events["match"].get("gameweek") + if game_week: + game_week = str(game_week) + game_id = raw_events["events"][0].get("matchId") + if game_id: + game_id = str(game_id) + coaches = raw_events["coaches"] + if home_team_id in coaches and "coach" in coaches[home_team_id]: + home_coach = coaches[home_team_id]["coach"].get("shortName") + else: + home_coach = None + if away_team_id in coaches and "coach" in coaches[away_team_id]: + away_coach = coaches[away_team_id]["coach"].get("shortName") + else: + away_coach = None events = [] @@ -757,6 +776,11 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: flags=None, provider=Provider.WYSCOUT, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_week=game_week, + game_id=game_id, + home_coach=home_coach, + away_coach=away_coach, ) return EventDataset(metadata=metadata, records=events) diff --git a/kloppy/infra/serializers/tracking/secondspectrum.py b/kloppy/infra/serializers/tracking/secondspectrum.py index 0412c15f..ca4441e8 100644 --- a/kloppy/infra/serializers/tracking/secondspectrum.py +++ b/kloppy/infra/serializers/tracking/secondspectrum.py @@ -1,8 +1,8 @@ import json import logging -from datetime import timedelta +from datetime import datetime, timedelta, timezone import warnings -from typing import Tuple, Dict, Optional, Union, NamedTuple, IO +from typing import Dict, Optional, Union, NamedTuple, IO from lxml import objectify @@ -23,6 +23,7 @@ Player, Provider, PlayerData, + Score, ) from kloppy.utils import Readable, performance_logging @@ -290,16 +291,34 @@ def _iter(): ) orientation = Orientation.NOT_SET + if metadata: + score = Score( + home=metadata["homeScore"], away=metadata["awayScore"] + ) + year, month, day = ( + metadata["year"], + metadata["month"], + metadata["day"], + ) + date = datetime(year, month, day, 0, 0, tzinfo=timezone.utc) + game_id = metadata["ssiId"] + else: + score = None + date = None + game_id = None + metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, - score=None, + score=score, frame_rate=frame_rate, orientation=orientation, provider=Provider.SECONDSPECTRUM, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_id=game_id, ) return TrackingDataset( diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index 364cc9e1..21999622 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -1,8 +1,8 @@ import logging -from datetime import timedelta +from datetime import timedelta, timezone +from dateutil.parser import parse import warnings -from typing import List, Dict, Tuple, NamedTuple, IO, Optional, Union -from enum import Enum, Flag +from typing import NamedTuple, IO, Optional, Union from collections import Counter import numpy as np import json @@ -340,6 +340,25 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: ) teams = [home_team, away_team] + date = metadata.get("date_time") + if date: + date = parse(date).astimezone(timezone.utc) + + game_id = metadata.get("id") + if game_id: + game_id = str(game_id) + + home_team_coach = metadata.get("home_team_coach") + if home_team_coach is not None: + home_coach = f"{home_team_coach['first_name']} {home_team_coach['last_name']}" + + away_team_coach = metadata.get("away_team_coach") + if away_team_coach is not None: + away_coach = f"{away_team_coach['first_name']} {away_team_coach['last_name']}" + + if game_id: + game_id = str(game_id) + for player_track_obj_id, player in player_dict.items(): team_id = player["team_id"] @@ -439,6 +458,10 @@ def _iter(): provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_id=game_id, + home_coach=home_coach, + away_coach=away_coach, ) return TrackingDataset( diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index 038cb3ab..3f418375 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -2,7 +2,8 @@ import warnings from collections import defaultdict from typing import NamedTuple, Optional, Union, IO -from datetime import timedelta +from datetime import timedelta, timezone +from dateutil.parser import parse from lxml import objectify @@ -126,8 +127,15 @@ def deserialize( pitch_length=sportec_metadata.x_max, pitch_width=sportec_metadata.y_max, ) + home_coach = sportec_metadata.home_coach + away_coach = sportec_metadata.away_coach with performance_logging("parse raw data", logger=logger): + date = parse( + match_root.MatchInformation.General.attrib["KickoffTime"] + ).astimezone(timezone.utc) + game_week = match_root.MatchInformation.General.attrib["MatchDay"] + game_id = match_root.MatchInformation.General.attrib["MatchId"] def _iter(): player_map = {} @@ -229,6 +237,11 @@ def _iter(): provider=Provider.SPORTEC, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_week=game_week, + game_id=game_id, + home_coach=home_coach, + away_coach=away_coach, ) return TrackingDataset( diff --git a/kloppy/infra/serializers/tracking/statsperform.py b/kloppy/infra/serializers/tracking/statsperform.py index 88d47965..ae25eda0 100644 --- a/kloppy/infra/serializers/tracking/statsperform.py +++ b/kloppy/infra/serializers/tracking/statsperform.py @@ -136,6 +136,10 @@ def deserialize(self, inputs: StatsPerformInputs) -> TrackingDataset: for period in meta_data_parser.extract_periods() } teams_list = list(meta_data_parser.extract_lineups()) + score = meta_data_parser.extract_score() + date = meta_data_parser.extract_date() + game_week = meta_data_parser.extract_game_week() + game_id = meta_data_parser.extract_game_id() with performance_logging("Loading tracking data", logger=logger): tracking_data = inputs.raw_data.read().decode("ascii").splitlines() @@ -192,12 +196,15 @@ def _iter(): teams=teams_list, periods=list(periods.values()), pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, - score=None, + score=score, frame_rate=frame_rate, orientation=orientation, provider=Provider.STATSPERFORM, flags=DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_week=game_week, + game_id=game_id, ) return TrackingDataset( diff --git a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py index a878c9e3..0ccf6895 100644 --- a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py +++ b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py @@ -1,8 +1,9 @@ import logging -from datetime import timedelta +from datetime import timedelta, timezone import warnings from typing import Dict, Optional, Union import html +from dateutil.parser import parse from lxml import objectify @@ -169,6 +170,10 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: pitch_size_height = float( match.attrib["fPitchYSizeMeters"].replace(",", ".") ) + date = parse(meta_data.match.attrib["dtDate"]).astimezone( + timezone.utc + ) + game_id = meta_data.match.attrib["iId"] periods = [] for period in match.iterchildren(tag="period"): @@ -269,6 +274,8 @@ def _iter(): provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), + date=date, + game_id=game_id, ) return TrackingDataset( diff --git a/kloppy/tests/files/second_spectrum_fake_metadata.json b/kloppy/tests/files/second_spectrum_fake_metadata.json index 546620f3..18490768 100644 --- a/kloppy/tests/files/second_spectrum_fake_metadata.json +++ b/kloppy/tests/files/second_spectrum_fake_metadata.json @@ -343,7 +343,9 @@ "optaUuid": "plq0s041krxbe84x8t7ic4pwp" } ], - "ssiId": null, + "homeScore": 2, + "awayScore": 1, + "ssiId": "1234456", "optaId": 1234456, "optaUuid": "evpjwmqgrefu1yb27oh74i8yw", "homeSsiId": "zvmye4srxdo1zvczazdppju0a03sado8a74z", diff --git a/kloppy/tests/test_datafactory.py b/kloppy/tests/test_datafactory.py index f11eba29..3494592d 100644 --- a/kloppy/tests/test_datafactory.py +++ b/kloppy/tests/test_datafactory.py @@ -61,6 +61,24 @@ def test_correct_deserialization(self, event_data: str): 2011, 11, 11, 10, 53, 55, 0, timezone.utc ) + # Check enriched metadata + date = dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime( + 2011, 11, 11, 0, 0, tzinfo=timezone.utc + ) + + game_week = dataset.metadata.game_week + if game_week: + assert isinstance(game_week, str) + assert game_week == "Final" + + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "1111111" + assert dataset.events[0].timestamp == timedelta( seconds=3 ) # kickoff first half diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py index 0c421891..01f9c0e7 100644 --- a/kloppy/tests/test_helpers.py +++ b/kloppy/tests/test_helpers.py @@ -60,6 +60,9 @@ def _get_tracking_dataset(self): score=None, provider=None, coordinate_system=None, + date="2024-05-19T13:30:00", + game_week="35", + game_id="2374516", ) tracking_data = TrackingDataset( @@ -170,7 +173,7 @@ def test_transform_to_orientation(self): # Create a dataset with the KLOPPY pitch dimensions # and HOME_AWAY orientation original = self._get_tracking_dataset().transform( - to_pitch_dimensions=to_pitch_dimensions, + to_pitch_dimensions=to_pitch_dimensions ) assert original.metadata.orientation == Orientation.HOME_AWAY assert original.frames[0].ball_coordinates == Point3D(x=1, y=0, z=0) diff --git a/kloppy/tests/test_secondspectrum.py b/kloppy/tests/test_secondspectrum.py index b31f7a4a..f640bc25 100644 --- a/kloppy/tests/test_secondspectrum.py +++ b/kloppy/tests/test_secondspectrum.py @@ -1,11 +1,9 @@ -import logging -from datetime import timedelta +from datetime import datetime, timedelta, timezone from pathlib import Path import pytest from kloppy.domain import ( - AttackingDirection, Orientation, Provider, Point, @@ -100,6 +98,24 @@ def test_correct_deserialization( assert pitch_dimensions.y_dim.min == -33.985 assert pitch_dimensions.y_dim.max == 33.985 + # Check enriched metadata + date = dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime( + 1900, 1, 26, 0, 0, tzinfo=timezone.utc + ) + + game_week = dataset.metadata.game_week + if game_week: + assert isinstance(game_week, str) + assert game_week == "1" + + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "1234456" + def test_correct_normalized_deserialization( self, meta_data: Path, raw_data: Path, additional_meta_data: Path ): diff --git a/kloppy/tests/test_skillcorner.py b/kloppy/tests/test_skillcorner.py index a84518fa..35cba16d 100644 --- a/kloppy/tests/test_skillcorner.py +++ b/kloppy/tests/test_skillcorner.py @@ -1,12 +1,10 @@ -from datetime import timedelta +from datetime import datetime, timedelta, timezone from pathlib import Path import pytest from kloppy.domain import ( - Period, Provider, - AttackingDirection, Orientation, Point, Point3D, @@ -115,6 +113,29 @@ def test_correct_deserialization(self, raw_data: Path, meta_data: Path): assert pitch_dimensions.y_dim.min == -34 assert pitch_dimensions.y_dim.max == 34 + # Check enriched metadata + date = dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime( + 2019, 11, 9, 17, 30, 0, tzinfo=timezone.utc + ) + + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "2417" + + home_coach = dataset.metadata.home_coach + if home_coach: + assert isinstance(home_coach, str) + assert home_coach == "Hans-Dieter Flick" + + away_coach = dataset.metadata.away_coach + if away_coach: + assert isinstance(away_coach, str) + assert away_coach == "Lucien Favre" + def test_correct_normalized_deserialization( self, meta_data: str, raw_data: str ): diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index e85e5540..3b664f47 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -203,3 +203,38 @@ def test_load_only_alive_frames(self, raw_data: Path, meta_data: Path): only_alive=True, ) assert len(dataset) == 199 + + def test_enriched_metadata(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + only_alive=True, + ) + + date = dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime( + 2020, 6, 5, 18, 30, 0, 210000, tzinfo=timezone.utc + ) + + game_week = dataset.metadata.game_week + if game_week: + assert isinstance(game_week, str) + assert game_week == "30" + + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "DFL-MAT-003BN1" + + home_coach = dataset.metadata.home_coach + if home_coach: + assert isinstance(home_coach, str) + assert home_coach == "C. Streich" + + away_coach = dataset.metadata.away_coach + if away_coach: + assert isinstance(away_coach, str) + assert away_coach == "M. Rose" diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index f602603f..9208e3e6 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -1,6 +1,6 @@ import os from collections import defaultdict -from datetime import timedelta +from datetime import timedelta, datetime, timezone from pathlib import Path from typing import cast @@ -73,6 +73,13 @@ def dataset() -> EventDataset: lineup_data=f"{API_URL}/lineups/3794687.json", three_sixty_data=f"{API_URL}/three-sixty/3794687.json", coordinates="statsbomb", + additional_metadata={ + "date": datetime(2020, 8, 23, 0, 0, tzinfo=timezone.utc), + "game_week": "7", + "game_id": "3888787", + "home_coach": "R. Martínez Montoliù", + "away_coach": "F. Fernandes da Costa Santos", + }, ) assert dataset.dataset_type == DatasetType.EVENT return dataset @@ -194,6 +201,32 @@ def test_flags(self, dataset): == DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE ) + def test_enriched_metadata(self, dataset): + date = dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime(2020, 8, 23, 0, 0, tzinfo=timezone.utc) + + game_week = dataset.metadata.game_week + if game_week: + assert isinstance(game_week, str) + assert game_week == "7" + + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "3888787" + + home_coach = dataset.metadata.home_coach + if home_coach: + assert isinstance(home_coach, str) + assert home_coach == "R. Martínez Montoliù" + + away_coach = dataset.metadata.away_coach + if away_coach: + assert isinstance(away_coach, str) + assert away_coach == "F. Fernandes da Costa Santos" + class TestStatsBombEvent: """Generic tests related to deserializing events""" diff --git a/kloppy/tests/test_statsperform.py b/kloppy/tests/test_statsperform.py index 3f69576a..2f2b0487 100644 --- a/kloppy/tests/test_statsperform.py +++ b/kloppy/tests/test_statsperform.py @@ -145,6 +145,22 @@ def test_periods(self, tracking_dataset: TrackingDataset): 2020, 8, 23, 12, 56, 30, tzinfo=timezone.utc ) + def test_enriched_metadata(self, tracking_dataset: TrackingDataset): + date = tracking_dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime(2020, 8, 23, 0, 0, tzinfo=timezone.utc) + + game_week = tracking_dataset.metadata.game_week + if game_week: + assert isinstance(game_week, str) + assert game_week == "1" + + game_id = tracking_dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "7ijuqohwgmplbxdj1625sxwfe" + class TestStatsPerformEvent: """Tests related to deserializing the MA3 event data feed. @@ -155,10 +171,13 @@ class TestStatsPerformEvent: def test_deserialize_all(self, event_dataset: EventDataset): assert event_dataset.metadata.provider == Provider.STATSPERFORM - assert event_dataset.metadata.coordinate_system == OptaCoordinateSystem( - # StatsPerform does not provide pitch dimensions - pitch_length=None, - pitch_width=None, + assert ( + event_dataset.metadata.coordinate_system + == OptaCoordinateSystem( + # StatsPerform does not provide pitch dimensions + pitch_length=None, + pitch_width=None, + ) ) assert len(event_dataset.records) == 1652 diff --git a/kloppy/tests/test_tracab.py b/kloppy/tests/test_tracab.py index 3298e2ce..192f7166 100644 --- a/kloppy/tests/test_tracab.py +++ b/kloppy/tests/test_tracab.py @@ -1,5 +1,5 @@ from pathlib import Path -from datetime import timedelta +from datetime import datetime, timedelta, timezone import pytest @@ -234,6 +234,22 @@ def test_correct_normalized_deserialization( player_home_1 ].coordinates == Point(x=1.0019047619047619, y=0.49602941176470583) + date = dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime( + 2023, 12, 15, 20, 32, 20, tzinfo=timezone.utc + ) + + game_week = dataset.metadata.game_week + if game_week: + assert isinstance(game_week, str) + + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "1" + class TestTracabMeta2: def test_correct_deserialization( diff --git a/kloppy/tests/test_wyscout.py b/kloppy/tests/test_wyscout.py index 1a5c4c6a..87b1a0ab 100644 --- a/kloppy/tests/test_wyscout.py +++ b/kloppy/tests/test_wyscout.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from pathlib import Path import pytest @@ -75,6 +75,11 @@ def test_metadata(self, dataset: EventDataset): seconds=2863.708369 ) + timedelta(seconds=2999.70982) + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "2499773" + def test_timestamps(self, dataset: EventDataset): kickoff_p1 = dataset.get_event_by_id("190078343") assert kickoff_p1.timestamp == timedelta(seconds=2.643377) @@ -180,6 +185,34 @@ def test_metadata(self, dataset: EventDataset): minutes=20, seconds=47 ) + timedelta(minutes=50, seconds=35) + def test_enriched_metadata(self, dataset: EventDataset): + date = dataset.metadata.date + if date: + assert isinstance(date, datetime) + assert date == datetime( + 2020, 8, 2, 18, 45, tzinfo=timezone.utc + ) + + game_week = dataset.metadata.game_week + if game_week: + assert isinstance(game_week, str) + assert game_week == "38" + + game_id = dataset.metadata.game_id + if game_id: + assert isinstance(game_id, str) + assert game_id == "2852835" + + home_coach = dataset.metadata.home_coach + if home_coach: + assert isinstance(home_coach, str) + assert home_coach == "S. Mihajlović" + + away_coach = dataset.metadata.away_coach + if away_coach: + assert isinstance(away_coach, str) + assert away_coach == "M. Longo" + def test_timestamps(self, dataset: EventDataset): kickoff_p1 = dataset.get_event_by_id(663292348) assert kickoff_p1.timestamp == timedelta(minutes=0, seconds=1)