better project structure

renecotyfanboy · May 11, 2024 · 8569af5 · 8569af5
1 parent fb6e810
commit 8569af5
Show file tree

Hide file tree

Showing 11 changed files with 629 additions and 116 deletions.
diff --git a/dataset_exploration.ipynb b/dataset_exploration.ipynb
diff --git a/discrete_markov_model.ipynb b/discrete_markov_model.ipynb
diff --git a/docs/api/data.md b/docs/api/data.md
@@ -0,0 +1,4 @@
+::: leaguedata.data
+    options:
+      show_root_heading: True
+      show_root_toc_entry: false
diff --git a/docs/api/inference.md b/docs/api/inference.md
@@ -0,0 +1,4 @@
+::: leaguedata.inference
+    options:
+      show_root_heading: True
+      show_root_toc_entry: false
diff --git a/docs/api/model.md b/docs/api/model.md
@@ -0,0 +1,4 @@
+::: leaguedata.model
+    options:
+      show_root_heading: True
+      show_root_toc_entry: false
diff --git a/docs/api/reference.md b/docs/api/reference.md
diff --git a/leaguedata/__init__.py b/leaguedata/__init__.py
diff --git a/data.py → leaguedata/data.py b/data.py → leaguedata/data.py
@@ -1,4 +1,5 @@
 import polars as pl
+import numpy as np
 from datasets import load_dataset
 
 
@@ -41,3 +42,27 @@ def get_tier_sorted() -> list:
                 tier_with_sub.append(f'{tier}_{division}')
 
     return tier_with_sub + ['MASTER', 'GRANDMASTER', 'CHALLENGER']
+
+
+def get_history_dict():
+    """
+    Return a two level dictionary containing the history of all players in the reference sample.
+    Accessed by elo and then by puuid.
+    """
+
+    columns = ['elo', 'puuid', 'gameStartTimestamp', 'is_in_reference_sample', 'win']
+    df = get_dataset(columns)
+    unique_elo = df.filter(is_in_reference_sample=True)['elo'].unique()
+
+    history = {}
+
+    for elo in unique_elo:
+        loc_df = df.filter(elo=elo, is_in_reference_sample=True)
+        history[elo] = {}
+        unique_puuid = loc_df['puuid'].unique()
+
+        for puuid in unique_puuid:
+            loc_history = loc_df.filter(puuid=puuid)
+            history[elo][puuid] = np.asarray(loc_history.sort(by='gameStartTimestamp')['win'])
+
+    return history
diff --git a/leaguedata/inference.py b/leaguedata/inference.py
@@ -0,0 +1,36 @@
+import numpyro
+import numpyro.distributions as dist
+import jax.numpy as jnp
+import numpy as np
+import tensorflow_probability.substrates.jax.distributions as tfd
+
+
+def numpyro_model(markov_model, observed_data):
+    """
+    Function that is used as a model in NumPyro to perform inference on the Discrete Markov Chain model.
+
+    Parameters:
+        markov_model (DMCModel): The Discrete Markov Chain model to use.
+        observed_data (jnp.array): The observed data to use for inference.
+    """
+
+    if not markov_model.is_bernoulli:
+        proba = numpyro.sample('proba',
+                               dist.Uniform(low=jnp.zeros(2 ** markov_model.n), high=jnp.ones(2 ** markov_model.n)))
+    else:
+        proba = numpyro.sample('proba', dist.Uniform(low=0, high=1)) * jnp.ones(2 ** markov_model.n)
+
+    transition_matrix = markov_model.build_transition_matrix(proba)
+
+    def transition_fn(_, x):
+        return tfd.Categorical(probs=transition_matrix[x])
+
+    encoded_history = np.apply_along_axis(markov_model.binary_serie_to_categorical, 1, observed_data)
+
+    likelihood_dist = tfd.MarkovChain(
+        initial_state_prior=tfd.Categorical(probs=markov_model.uniform_prior),
+        transition_fn=transition_fn,
+        num_steps=encoded_history.shape[1]
+    )
+
+    numpyro.sample('likelihood', likelihood_dist, obs=encoded_history)
diff --git a/model.py → leaguedata/model.py b/model.py → leaguedata/model.py
@@ -1,5 +1,8 @@
+import jax.random
 import numpy as np
 import jax.numpy as jnp
+import numpyro.distributions as dist
+from jax.random import PRNGKey
 from tensorflow_probability.substrates.jax import distributions as tfd
 from bidict import bidict
 from itertools import product
@@ -183,3 +186,82 @@ def to_mermaid(self, probs):
                     graph_str += line_str + '\n'
 
         return graph_str
+
+
+def generate_obvious_loser_q(number_of_games=85, number_of_players=200, key=PRNGKey(42)):
+    """
+    Generate mock history of players using the obvious loserQ model.
+
+    Parameters:
+        number_of_games (int): The number of games in the mock history.
+        number_of_players (int): The number of players.
+        key (PRNGKey): The key to generate the mock history.
+    """
+
+    markov_util_ref = DMCModel(4)
+
+    probs = jnp.empty((2 ** 4))
+    probs_keys = {0.: 0.25, 0.25: 1 / 3, 0.5: 0.5, 0.75: 2 / 3, 1.: 0.75}
+
+    for i, state in enumerate(markov_util_ref.get_states()):
+        probs = probs.at[i].set(probs_keys[sum(state) / 4])
+
+    mock_history_encoded = markov_util_ref.build_process(number_of_games - 3, probs=probs).sample(number_of_players, seed=key)
+    mock_history = np.apply_along_axis(markov_util_ref.categorical_serie_to_binary, 1, mock_history_encoded)
+
+    return mock_history
+
+
+def generate_coinflip_history(number_of_games=85, number_of_players=200, key=PRNGKey(42)):
+    """
+    Generate mock history of players using the coinflip model.
+
+    Parameters:
+        number_of_games (int): The number of games in the mock history.
+        number_of_players (int): The number of players.
+        key (PRNGKey): The key to generate the mock history.
+    """
+
+    return np.asarray(jax.random.bernoulli(key, 0.5, shape=(number_of_players, number_of_games)))
+
+
+def generate_nasty_loser_q(number_of_games=85, number_of_players=200, key=PRNGKey(42), return_importance=False):
+    """
+    Generate mock history of players using the nasty loserQ model.
+
+    Parameters:
+        number_of_games (int): The number of games in the mock history.
+        number_of_players (int): The number of players.
+        key (PRNGKey): The key to generate the mock history.
+        return_importance (bool): Whether to return the importance of the loserQ for each player.
+    """
+    markov = DMCModel(4)
+    keys = jax.random.split(key, 2)
+
+    importance = dist.Beta(1.2, 10).sample(keys[0], sample_shape=(number_of_players,))
+
+    def single_history(key, importance, number_of_games):
+        probs = jnp.empty((2 ** 4))
+
+        probs_keys = {0.: 0.5 - 0.375 * importance,
+                      0.25: 0.5 - 0.125 * importance,
+                      0.5: 0.5,
+                      0.75: 0.5 + 0.125 * importance,
+                      1.: 0.5 + 0.375 * importance}
+
+        for i, state in enumerate(markov.get_states()):
+            probs = probs.at[i].set(probs_keys[sum(state) / 4])
+
+        return markov.build_process(number_of_games -3, probs=probs).sample(1, seed=key)[0]
+
+    keys = jax.random.split(keys[1], number_of_players)
+    history_categorical = np.asarray(
+        jax.vmap(lambda key, importance: single_history(key, importance, number_of_games)
+                 )(keys, importance))
+
+    history = np.apply_along_axis(markov.categorical_serie_to_binary, 1, history_categorical)
+
+    if return_importance:
+        return history, importance
+
+    return history
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -18,7 +18,9 @@ nav:
   - Around trueskill2:
     - trueskill2/introduction.md
   - Reference:
-    - model : api/reference.md
+    - data : api/data.md
+    - model : api/model.md
+    - inference : api/inference.md