Skip to content

Commit

Permalink
Scores are in each file, along with values
Browse files Browse the repository at this point in the history
  • Loading branch information
MLDERES committed Aug 21, 2020
1 parent 3398072 commit ce7f75b
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 31 deletions.
4 changes: 3 additions & 1 deletion conf/base/scoring.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ full_ppr:
half_ppr:
<<: *standard_scoring
rcv_rec: 0.5


custom:
<<: *standard_scoring
30 changes: 26 additions & 4 deletions notebooks/analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
"proj_path = current_dir.parent # point back to the root of the project\n",
"context = load_context(proj_path)\n",
"catalog = context.catalog\n",
"\n",
"from phantasyfootballer.settings import *\n",
"from phantasyfootballer.common import *"
"\n"
]
},
{
Expand All @@ -37,8 +35,32 @@
},
"outputs": [],
"source": [
"from phantasyfootballer.settings import *\n",
"from phantasyfootballer.common import Stats\n",
"df_ppr = catalog.load('scoring.ppr')\n",
"df_ppr.head()"
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_ppr.sort_values(Stats.POS_RANK)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"\n",
"df_ppr[Stats.POS_RANK] = df_ppr.groupby(POSITION)[Stats.FANTASY_POINTS].rank(na_option=\"bottom\", ascending=False)\n",
"df_ppr.sort_values(Stats.POS_RANK, ascending=True)"
]
},
{
Expand Down
42 changes: 42 additions & 0 deletions notebooks/sample.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,48 @@
"df_right['NEW_COL'] = 'new_data'\n",
"combine_data_horizontal(df_left,df_right)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from phantasyfootballer.settings import *\n",
"from phantasyfootballer.common import Stats\n",
"data = context.catalog.load('scoring.ppr')\n",
"#data.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"pos_data = data.groupby(POSITION)[Stats.FANTASY_POINTS].mean()\n",
"joined = data.join(pos_data,on=POSITION,rsuffix='avg')\n",
"data[Stats.PCT_AVERAGE] = joined[Stats.FANTASY_POINTS]/joined['fpavg']\n",
"joined.head()\n",
"data.head()"
]
}
],
"metadata": {
Expand Down
21 changes: 18 additions & 3 deletions src/phantasyfootballer/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,15 @@ class Stats():
FP_HALF = "fp_hppr"
FP_FULL = "fp_ppr"
FANTASY_POINTS = "fp"

ALL_STATS = [
RANK = 'overall_rank'
PCT_TYPICAL_POS = 'percent_typical_position'
PCT_MEAN_POS = 'percent_average_position'
PCT_MEDIAN_POS = 'percent_median_position'
PCT_TYPICAL_OVR = 'percent_typical_overall'
PCT_MEAN_OVR = 'percent_average_overall'
PCT_MEDIAN_OVR= 'percent_median_overall'

ALL_STATS= [
PASS_ATT,
PASS_COMP,
PASS_YDS,
Expand All @@ -87,7 +94,15 @@ class Stats():
DST_SAFE,
DST_PA,
MISC_FL,
FANTASY_POINTS
RANK,
POS_RANK,
FANTASY_POINTS,
PCT_TYPICAL_POS,
PCT_MEAN_POS,
PCT_MEDIAN_POS,
PCT_TYPICAL_OVR,
PCT_MEAN_OVR,
PCT_MEDIAN_OVR
]

@staticmethod
Expand Down
49 changes: 28 additions & 21 deletions src/phantasyfootballer/pipelines/data_engineering/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,8 @@
from kedro.config import ConfigLoader
from functools import reduce, partial, update_wrapper


String_or_List = Union[str, List[str]]


def normalize_data_source(data: pd.DataFrame, stat_name: str, common_stats: Dict[str, any]) -> pd.DataFrame:
"""
This node will take a data source that is provided and adjust the stats so that they have
a common stat column name. Additionally, if there is a stat that is common to the entire dataset
(e.g. NFL week, NFL year, all qbs) that isn't already part of the file then this will be set as well.
Say for instance, that the provider returns a file called 2019_passing_stats. The column NFL Year is
not likley included, so you can have it included by specifying that in the common_stats dictionary.
The mapping from a provider column name and the common name are taking from conf/project/parameters.yml
"""
pass



def _craft_scoring_dict(scheme: str) -> Dict[str, Any]:
"""
Look up the scoring system in the scoring.yml file
Expand Down Expand Up @@ -57,7 +40,7 @@ def _calculate_projected_points(scoring: String_or_List, data: pd.DataFrame) ->
for c in data.columns:
if (m := score_map.get(c)) :
df_pts[c + "_pts"] = data[c] * m
data[FANTASY_POINTS] = round(df_pts.sum(axis=1), 2)
data[Stats.FANTASY_POINTS] = round(df_pts.sum(axis=1), 2)

return data

Expand Down Expand Up @@ -99,24 +82,44 @@ def average_stats_by_player(*dataframes: Sequence[pd.DataFrame]) -> pd.DataFrame
# Pull all the dataframes into a single one
df_all = pd.concat(dataframes)
# Get the mean keeping the columns that matter
df_all = df_all.groupby(["player", "team", "position"]).mean().fillna(0)
df_all = df_all.groupby([PLAYER_NAME, TEAM, POSITION]).mean().fillna(0)
# Drop all the players where they have 0 projections
df_all = df_all[df_all.sum(axis=1) > 0].reset_index()
# Drop positions we don't care about
df_all = df_all.query('position in ["QB","RB","TE","WR","DST"]').reset_index(drop=True)
return df_all

def calculate_player_rank(data: pd.DataFrame) -> pd.DataFrame:
"""
Calculate player ranking in a few different ways
Args:
data (pd.DataFrame): datafrme of player stats including project fantasy points
Returns:
pd.DataFrame: same dataframe with additional columns for player rank (overall) and by position
"""
# Calculate overall rank by points
data[Stats.RANK] = data[Stats.FANTASY_POINTS].rank(na_option="bottom", ascending=False)
# Calculate rank by position
data[Stats.POS_RANK] = data.groupby(POSITION)[Stats.FANTASY_POINTS].rank(na_option="bottom", ascending=False)
return data

def percent_mean(data: pd.DataFrame) -> pd.DataFrame:
"""
Calculate the overall rank and position rank using the score provided
Calculate the mean points for the player position, then determine how much more value this player has than
other players in the same position
Args:
data (pd.DataFrame): the dataframe that has the players and a single scoring scheme
Returns:
pd.DataFrame: updated dataframe with two new columns, rank and position rank
pd.DataFrame: updated with percentage
"""
pos_data = data.groupby(POSITION)[Stats.FANTASY_POINTS].mean()
joined = data.join(pos_data,on=POSITION,rsuffix='avg')
data[Stats.PCT_MEAN_POS] = joined[Stats.FANTASY_POINTS]/joined['fpavg']
data[Stats.PCT_MEAN_OVR] = data[Stats.FANTASY_POINTS]/(data[Stats.FANTASY_POINTS].mean())
return data


Expand All @@ -131,6 +134,10 @@ def percent_typical(data: pd.DataFrame) -> pd.DataFrame:
pd.DataFrame: updated dataframe with a column that has identified the value of a player
relative to the typical player in his position
"""
pos_data = data.groupby(POSITION)[Stats.FANTASY_POINTS].median()
joined = data.join(pos_data,on=POSITION,rsuffix='_med')
data[Stats.PCT_MEDIAN_POS] = joined[Stats.FANTASY_POINTS]/joined['fp_med']
data[Stats.PCT_MEDIAN_OVR] = data[Stats.FANTASY_POINTS]/(data[Stats.FANTASY_POINTS].median())
return data


Expand Down
21 changes: 19 additions & 2 deletions src/phantasyfootballer/pipelines/data_engineering/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@
)
]
)
score_custom_pipeline = Pipeline(
[
node(
calculate_projected_points("custom"),
"average_stats_by_player_data",
"scored_custom_data",
)
]
)
ranking_pipeline = Pipeline(
[
node(calculate_player_rank, "scored_data", "ranked_data", name="overall_rank_node"),
Expand All @@ -77,7 +86,8 @@
),
]
)

# Each of the following pipelines are here to do the ranking for each
# scoring type
full_ppr_pipeline = pipeline(
ranking_pipeline,
inputs={"scored_data": "scored_ppr_data"},
Expand All @@ -98,15 +108,22 @@
outputs={"final_score_data": "scoring.standard"},
namespace="std",
)

full_custom_pipeline = pipeline(
ranking_pipeline,
inputs={"scored_data": "scored_custom_data"},
outputs={"final_score_data": "scoring.custom"},
namespace="custom",
)

final_scoring_ranking_pipeline = (
score_ppr_pipeline
+ score_half_ppr_pipeline
+ score_std_pipeline
+ score_std_pipeline
+ full_ppr_pipeline
+ full_half_ppr_pipeline
+ full_standard_pipeline
+ full_standard_pipeline
)


Expand Down

0 comments on commit ce7f75b

Please sign in to comment.