-
Notifications
You must be signed in to change notification settings - Fork 12
/
elo_vs_surface_elo.py
47 lines (37 loc) · 1.33 KB
/
elo_vs_surface_elo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# -*- coding: utf-8 -*-
import altair as alt
import pandas as pd
## Elo ratings available here: http://tennisabstract.com/reports/wta_elo_ratings.html
## csv is reduced to one column each for overall Elo and "GrassRaw" Elo
## note: grass Elo is based only on grass-court results
## (code to generate ratings is not public)
df = pd.read_csv('data/20210705_wta_gElo.csv')
df.astype({'Elo': 'float',
'GrassRaw': 'float',
'still_in': 'int'
}).dtypes
## label only players remaining in the tournament
## (still_in is boolean variable manually added to the data input)
df['label'] = df.apply(lambda row: row.Player if row.still_in == 1 else '', axis=1)
## probably should automate setting the domains of each axis;
## I added a bit extra to the high-end of the X-axis to make room for a label
points = alt.Chart(df).mark_circle(size=60).encode(
alt.X('Elo:Q',
scale=alt.Scale(domain=(1300,2350)),
axis=alt.Axis(title='Overall Elo')
),
alt.Y('GrassRaw:Q',
scale=alt.Scale(domain=(1200,1900)),
axis=alt.Axis(title='Grass-Only Elo')
),
color=alt.Color('still_in', legend=None),
)
text = points.mark_text(
align='left',
baseline='middle',
dx=7
).encode(
text='label'
)
combined = points + text
combined.save('output/wta_elo_vs_gElo.html')