Skip to content

Commit

Permalink
Enriched metadata with date, game_week and game_id
Browse files Browse the repository at this point in the history
  • Loading branch information
SportsDynamicsDS committed Jul 17, 2024
1 parent d001eb0 commit e07822c
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 6 deletions.
6 changes: 6 additions & 0 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,9 @@ class Metadata:
orientation: See [`Orientation`][kloppy.domain.models.common.Orientation]
flags:
provider: See [`Provider`][kloppy.domain.models.common.Provider]
date: Date of the game.
game_week: Game week or stage of the game.
game_id: Game id of the game from the provider.
"""

teams: List[Team]
Expand All @@ -1014,6 +1017,9 @@ class Metadata:
coordinate_system: CoordinateSystem
score: Optional[Score] = None
frame_rate: Optional[float] = None
date: Optional[str] = None
game_week: Optional[str] = None
game_id: Optional[str] = None
attributes: Optional[Dict] = field(default_factory=dict, compare=False)

def __post_init__(self):
Expand Down
6 changes: 6 additions & 0 deletions kloppy/infra/serializers/event/statsperform/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
periods = metadata_parser.extract_periods()
score = metadata_parser.extract_score()
teams = metadata_parser.extract_lineups()
date = events_parser.extract_date()
game_week = events_parser.extract_game_week()
game_id = events_parser.extract_game_id()
raw_events = [
event
for event in events_parser.extract_events()
Expand Down Expand Up @@ -827,6 +830,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
if inputs.event_feed.upper() == "F24"
else Provider.STATSPERFORM,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
13 changes: 12 additions & 1 deletion kloppy/infra/serializers/event/statsperform/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from datetime import datetime
from dataclasses import dataclass, field
from typing import List, Optional


@dataclass
Expand Down Expand Up @@ -53,6 +52,18 @@ def extract_score(self) -> Optional[Score]:
"""Return the score of the game."""
return None

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
return None

def extract_lineups(self) -> Tuple[Team, Team]:
"""Return the home and away team."""
raise NotImplementedError
Expand Down
27 changes: 25 additions & 2 deletions kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""XML parser for Opta F24 feeds."""
import pytz
from datetime import datetime
from typing import List
from typing import List, Optional

from kloppy.domain import Period
from .base import OptaXMLParser, OptaEvent


Expand Down Expand Up @@ -53,3 +52,27 @@ def extract_events(self) -> List[OptaEvent]:
)
for event in game_elm.iterchildren("Event")
]

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
game_elm = self.root.find("Game")
if game_elm and "game_date" in game_elm.attrib:
return game_elm.attrib["game_date"]
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
game_elm = self.root.find("Game")
if game_elm and "matchday" in game_elm.attrib:
return game_elm.attrib["matchday"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
game_elm = self.root.find("Game")
if game_elm and "id" in game_elm.attrib:
return game_elm.attrib["id"]
else:
return None
29 changes: 28 additions & 1 deletion kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ def extract_periods(self) -> List[Period]:
return parsed_periods

def extract_score(self) -> Optional[Score]:
return None
live_data = self.root["liveData"]
match_details = live_data["matchDetails"]
home_score = match_details["scores"]["total"]["home"]
away_score = match_details["scores"]["total"]["away"]
if home_score is None or away_score is None:
return None
return Score(home=home_score, away=away_score)

def extract_lineups(self) -> Tuple[Team, Team]:
teams = {}
Expand Down Expand Up @@ -76,6 +82,27 @@ def extract_lineups(self) -> Tuple[Team, Team]:
raise DeserializationError("Lineup incomplete")
return home_team, away_team

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
if "matchInfo" in self.root and "date" in self.root["matchInfo"]:
return self.root["matchInfo"]["date"]
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
if "matchInfo" in self.root and "week" in self.root["matchInfo"]:
return self.root["matchInfo"]["week"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
if "matchInfo" in self.root and "id" in self.root["matchInfo"]:
return self.root["matchInfo"]["id"]
else:
return None

def _parse_teams(self) -> List[Dict[str, Any]]:
parsed_teams = []
match_info = self.root["matchInfo"]
Expand Down
15 changes: 14 additions & 1 deletion kloppy/infra/serializers/tracking/secondspectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Player,
Provider,
PlayerData,
Score
)

from kloppy.utils import Readable, performance_logging
Expand Down Expand Up @@ -290,16 +291,28 @@ def _iter():
)
orientation = Orientation.NOT_SET

if metadata:
score = Score(home=metadata["homeScore"], away=metadata["awayScore"])
year, month, day = metadata["year"], metadata["month"], metadata["day"]
date = f"{year}-{month}-{day}"
game_id = metadata["ssiId"]
else:
score = None
date = None
game_id = None

metadata = Metadata(
teams=teams,
periods=periods,
pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
score=None,
score=score,
frame_rate=frame_rate,
orientation=orientation,
provider=Provider.SECONDSPECTRUM,
flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_id=game_id,
)

return TrackingDataset(
Expand Down
3 changes: 3 additions & 0 deletions kloppy/infra/serializers/tracking/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ def _iter():
provider=Provider.SPORTEC,
flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
coordinate_system=transformer.get_to_coordinate_system(),
date=match_root.MatchInformation.General.attrib["KickoffTime"],
game_week=match_root.MatchInformation.General.attrib["MatchDay"],
game_id=match_root.MatchInformation.General.attrib["MatchId"],
)

return TrackingDataset(
Expand Down
9 changes: 8 additions & 1 deletion kloppy/infra/serializers/tracking/statsperform.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ def deserialize(self, inputs: StatsPerformInputs) -> TrackingDataset:
for period in meta_data_parser.extract_periods()
}
teams_list = list(meta_data_parser.extract_lineups())
score = meta_data_parser.extract_score()
date = meta_data_parser.extract_date()
game_week = meta_data_parser.extract_game_week()
game_id = meta_data_parser.extract_game_id()

with performance_logging("Loading tracking data", logger=logger):
tracking_data = inputs.raw_data.read().decode("ascii").splitlines()
Expand Down Expand Up @@ -192,12 +196,15 @@ def _iter():
teams=teams_list,
periods=list(periods.values()),
pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
score=None,
score=score,
frame_rate=frame_rate,
orientation=orientation,
provider=Provider.STATSPERFORM,
flags=DatasetFlag.BALL_STATE,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return TrackingDataset(
Expand Down
2 changes: 2 additions & 0 deletions kloppy/infra/serializers/tracking/tracab/tracab_dat.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ def _iter():
provider=Provider.TRACAB,
flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
coordinate_system=transformer.get_to_coordinate_system(),
date=meta_data.match.attrib["dtDate"],
game_id=meta_data.match.attrib["iId"],
)

return TrackingDataset(
Expand Down
3 changes: 3 additions & 0 deletions kloppy/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def _get_tracking_dataset(self):
score=None,
provider=None,
coordinate_system=None,
date="2024-05-19T13:30:00",
game_week="35",
game_id="2374516"
)

tracking_data = TrackingDataset(
Expand Down

0 comments on commit e07822c

Please sign in to comment.