-
Notifications
You must be signed in to change notification settings - Fork 1
/
Metrica_IO.py
114 lines (101 loc) · 4.54 KB
/
Metrica_IO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 4 11:18:49 2020
Module for reading in Metrica sample data.
Data can be found at: https://github.com/metrica-sports/sample-data
@author: Laurie Shaw (@EightyFivePoint)
"""
import pandas as pd
import csv as csv
import numpy as np
def read_match_data(DATADIR,gameid):
'''
read_match_data(DATADIR,gameid):
read all Metrica match data (tracking data for home & away teams, and ecvent data)
'''
tracking_home = tracking_data(DATADIR,gameid,'Home')
tracking_away = tracking_data(DATADIR,gameid,'Away')
events = read_event_data(DATADIR,gameid)
return tracking_home,tracking_away,events
def read_event_data(DATADIR,game_id):
'''
read_event_data(DATADIR,game_id):
read Metrica event data for game_id and return as a DataFrame
'''
eventfile = '/Sample_Game_%d/Sample_Game_%d_RawEventsData.csv' % (game_id,game_id) # filename
events = pd.read_csv('{}/{}'.format(DATADIR, eventfile)) # read data
return events
def tracking_data(DATADIR,game_id,teamname):
'''
tracking_data(DATADIR,game_id,teamname):
read Metrica tracking data for game_id and return as a DataFrame.
teamname is the name of the team in the filename. For the sample data this is either 'Home' or 'Away'.
'''
teamfile = '/Sample_Game_%d/Sample_Game_%d_RawTrackingData_%s_Team.csv' % (game_id,game_id,teamname)
# First: deal with file headers so that we can get the player names correct
csvfile = open('{}/{}'.format(DATADIR, teamfile), 'r') # create a csv file reader
reader = csv.reader(csvfile)
teamnamefull = next(reader)[3].lower()
print("Reading team: %s" % teamnamefull)
# construct column names
jerseys = [x for x in next(reader) if x != ''] # extract player jersey numbers from second row
columns = next(reader)
for i, j in enumerate(jerseys): # create x & y position column headers for each player
columns[i*2+3] = "{}_{}_x".format(teamname, j)
columns[i*2+4] = "{}_{}_y".format(teamname, j)
columns[-2] = "ball_x" # column headers for the x & y positions of the ball
columns[-1] = "ball_y"
# Second: read in tracking data and place into pandas Dataframe
tracking = pd.read_csv('{}/{}'.format(DATADIR, teamfile), names=columns, index_col='Frame', skiprows=3)
return tracking
def merge_tracking_data(home,away):
'''
merge home & away tracking data files into single data frame
'''
return home.drop(columns=['ball_x', 'ball_y']).merge( away, left_index=True, right_index=True )
def to_metric_coordinates(data,field_dimen=(106.,68.) ):
'''
Convert positions from Metrica units to meters (with origin at centre circle)
'''
x_columns = [c for c in data.columns if c[-1].lower()=='x']
y_columns = [c for c in data.columns if c[-1].lower()=='y']
data[x_columns] = ( data[x_columns]-0.5 ) * field_dimen[0]
data[y_columns] = -1 * ( data[y_columns]-0.5 ) * field_dimen[1]
'''
------------ ***NOTE*** ------------
Metrica actually define the origin at the *top*-left of the field, not the bottom-left, as discussed in the YouTube video.
I've changed the line above to reflect this. It was originally:
data[y_columns] = ( data[y_columns]-0.5 ) * field_dimen[1]
------------ ********** ------------
'''
return data
def to_single_playing_direction(home,away,events):
'''
Flip coordinates in second half so that each team always shoots in the same direction through the match.
'''
for team in [home,away,events]:
try:
second_half_idx = team.Period.idxmax(2)
except:
second_half_idx = team.Period.idxmax()
columns = [c for c in team.columns if c[-1].lower() in ['x','y']]
team.loc[second_half_idx:,columns] *= -1
return home,away,events
def find_playing_direction(team,teamname):
'''
Find the direction of play for the team (based on where the goalkeepers are at kickoff). +1 is left->right and -1 is right->left
'''
GK_column_x = teamname+"_"+find_goalkeeper(team)+"_x"
# +ve is left->right, -ve is right->left
return -np.sign(team.iloc[0][GK_column_x])
def find_goalkeeper(team):
'''
Find the goalkeeper in team, identifying him/her as the player closest to goal at kick off
'''
x_columns = [c for c in team.columns if c[-2:].lower()=='_x' and c[:4] in ['Home','Away']]
try:
GK_col = team.iloc[0][x_columns].abs().idxmax(axis=1)
except:
GK_col = team.iloc[0][x_columns].abs().idxmax(axis=0)
return GK_col.split('_')[1]