Skip to content

Commit

Permalink
better project structure
Browse files Browse the repository at this point in the history
  • Loading branch information
renecotyfanboy committed May 11, 2024
1 parent fb6e810 commit 8569af5
Show file tree
Hide file tree
Showing 11 changed files with 629 additions and 116 deletions.
150 changes: 149 additions & 1 deletion dataset_exploration.ipynb

Large diffs are not rendered by default.

427 changes: 322 additions & 105 deletions discrete_markov_model.ipynb

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions docs/api/data.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
::: leaguedata.data
options:
show_root_heading: True
show_root_toc_entry: false
4 changes: 4 additions & 0 deletions docs/api/inference.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
::: leaguedata.inference
options:
show_root_heading: True
show_root_toc_entry: false
4 changes: 4 additions & 0 deletions docs/api/model.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
::: leaguedata.model
options:
show_root_heading: True
show_root_toc_entry: false
9 changes: 0 additions & 9 deletions docs/api/reference.md

This file was deleted.

Empty file added leaguedata/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions data.py → leaguedata/data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import polars as pl
import numpy as np
from datasets import load_dataset


Expand Down Expand Up @@ -41,3 +42,27 @@ def get_tier_sorted() -> list:
tier_with_sub.append(f'{tier}_{division}')

return tier_with_sub + ['MASTER', 'GRANDMASTER', 'CHALLENGER']


def get_history_dict():
"""
Return a two level dictionary containing the history of all players in the reference sample.
Accessed by elo and then by puuid.
"""

columns = ['elo', 'puuid', 'gameStartTimestamp', 'is_in_reference_sample', 'win']
df = get_dataset(columns)
unique_elo = df.filter(is_in_reference_sample=True)['elo'].unique()

history = {}

for elo in unique_elo:
loc_df = df.filter(elo=elo, is_in_reference_sample=True)
history[elo] = {}
unique_puuid = loc_df['puuid'].unique()

for puuid in unique_puuid:
loc_history = loc_df.filter(puuid=puuid)
history[elo][puuid] = np.asarray(loc_history.sort(by='gameStartTimestamp')['win'])

return history
36 changes: 36 additions & 0 deletions leaguedata/inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpyro
import numpyro.distributions as dist
import jax.numpy as jnp
import numpy as np
import tensorflow_probability.substrates.jax.distributions as tfd


def numpyro_model(markov_model, observed_data):
"""
Function that is used as a model in NumPyro to perform inference on the Discrete Markov Chain model.
Parameters:
markov_model (DMCModel): The Discrete Markov Chain model to use.
observed_data (jnp.array): The observed data to use for inference.
"""

if not markov_model.is_bernoulli:
proba = numpyro.sample('proba',
dist.Uniform(low=jnp.zeros(2 ** markov_model.n), high=jnp.ones(2 ** markov_model.n)))
else:
proba = numpyro.sample('proba', dist.Uniform(low=0, high=1)) * jnp.ones(2 ** markov_model.n)

transition_matrix = markov_model.build_transition_matrix(proba)

def transition_fn(_, x):
return tfd.Categorical(probs=transition_matrix[x])

encoded_history = np.apply_along_axis(markov_model.binary_serie_to_categorical, 1, observed_data)

likelihood_dist = tfd.MarkovChain(
initial_state_prior=tfd.Categorical(probs=markov_model.uniform_prior),
transition_fn=transition_fn,
num_steps=encoded_history.shape[1]
)

numpyro.sample('likelihood', likelihood_dist, obs=encoded_history)
82 changes: 82 additions & 0 deletions model.py → leaguedata/model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import jax.random
import numpy as np
import jax.numpy as jnp
import numpyro.distributions as dist
from jax.random import PRNGKey
from tensorflow_probability.substrates.jax import distributions as tfd
from bidict import bidict
from itertools import product
Expand Down Expand Up @@ -183,3 +186,82 @@ def to_mermaid(self, probs):
graph_str += line_str + '\n'

return graph_str


def generate_obvious_loser_q(number_of_games=85, number_of_players=200, key=PRNGKey(42)):
"""
Generate mock history of players using the obvious loserQ model.
Parameters:
number_of_games (int): The number of games in the mock history.
number_of_players (int): The number of players.
key (PRNGKey): The key to generate the mock history.
"""

markov_util_ref = DMCModel(4)

probs = jnp.empty((2 ** 4))
probs_keys = {0.: 0.25, 0.25: 1 / 3, 0.5: 0.5, 0.75: 2 / 3, 1.: 0.75}

for i, state in enumerate(markov_util_ref.get_states()):
probs = probs.at[i].set(probs_keys[sum(state) / 4])

mock_history_encoded = markov_util_ref.build_process(number_of_games - 3, probs=probs).sample(number_of_players, seed=key)
mock_history = np.apply_along_axis(markov_util_ref.categorical_serie_to_binary, 1, mock_history_encoded)

return mock_history


def generate_coinflip_history(number_of_games=85, number_of_players=200, key=PRNGKey(42)):
"""
Generate mock history of players using the coinflip model.
Parameters:
number_of_games (int): The number of games in the mock history.
number_of_players (int): The number of players.
key (PRNGKey): The key to generate the mock history.
"""

return np.asarray(jax.random.bernoulli(key, 0.5, shape=(number_of_players, number_of_games)))


def generate_nasty_loser_q(number_of_games=85, number_of_players=200, key=PRNGKey(42), return_importance=False):
"""
Generate mock history of players using the nasty loserQ model.
Parameters:
number_of_games (int): The number of games in the mock history.
number_of_players (int): The number of players.
key (PRNGKey): The key to generate the mock history.
return_importance (bool): Whether to return the importance of the loserQ for each player.
"""
markov = DMCModel(4)
keys = jax.random.split(key, 2)

importance = dist.Beta(1.2, 10).sample(keys[0], sample_shape=(number_of_players,))

def single_history(key, importance, number_of_games):
probs = jnp.empty((2 ** 4))

probs_keys = {0.: 0.5 - 0.375 * importance,
0.25: 0.5 - 0.125 * importance,
0.5: 0.5,
0.75: 0.5 + 0.125 * importance,
1.: 0.5 + 0.375 * importance}

for i, state in enumerate(markov.get_states()):
probs = probs.at[i].set(probs_keys[sum(state) / 4])

return markov.build_process(number_of_games -3, probs=probs).sample(1, seed=key)[0]

keys = jax.random.split(keys[1], number_of_players)
history_categorical = np.asarray(
jax.vmap(lambda key, importance: single_history(key, importance, number_of_games)
)(keys, importance))

history = np.apply_along_axis(markov.categorical_serie_to_binary, 1, history_categorical)

if return_importance:
return history, importance

return history
4 changes: 3 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ nav:
- Around trueskill2:
- trueskill2/introduction.md
- Reference:
- model : api/reference.md
- data : api/data.md
- model : api/model.md
- inference : api/inference.md



Expand Down

0 comments on commit 8569af5

Please sign in to comment.