Skip to content

Commit

Permalink
remove a bunch of warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
guidopetri committed Sep 1, 2024
1 parent 5ea22b8 commit 03a8cfb
Show file tree
Hide file tree
Showing 11 changed files with 303 additions and 164 deletions.
277 changes: 227 additions & 50 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ readme = "README.md"
python = "^3.12"
psycopg2 = "^2.6.2"
luigi = "^3.3.0"
pandas = "^2.0.3"
pandas = "^2.2.2"
chess = "^1.10.0"
python-lichess = "^0.10"
sendgrid = "6.0.5"
stockfish = "3.5.0"
sqlalchemy = "1.3.0"
seaborn = "^0.10.0"
matplotlib = "3.8.4"
sqlalchemy = "^2.0.32"
seaborn = "^0.13.0"
beautifulsoup4 = "^4.8.0"
scikit-learn = "^1.0.1"
more-itertools = "^10.4.0"

[tool.poetry.group.dev.dependencies]
pytest = "^8.0.1"
Expand Down
4 changes: 2 additions & 2 deletions src/pipeline_import/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ def create_wp_features(df: pd.DataFrame) -> pd.DataFrame:
df = df[df['clock'] != -1]

df['opponent_clock'] = df.groupby(['game_link'])['clock'].shift(-1)
df['opponent_clock'].fillna(df['opponent_clock'].shift(2), inplace=True)
df['opponent_clock'] = df['opponent_clock'].fillna(df['opponent_clock'].shift(2)) # noqa

# in situations where there were only one or two moves,
# fill with the clock time
df['opponent_clock'].fillna(df['clock'], inplace=True)
df['opponent_clock'] = df['opponent_clock'].fillna(df['clock'])

# start with white
df['player_to_move'] = df['half_move'] % 2
Expand Down
7 changes: 4 additions & 3 deletions src/pipeline_import/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os

from matplotlib import use
from more_itertools import one
from seaborn import set as sns_set


Expand Down Expand Up @@ -78,9 +79,9 @@ def make_elo_by_weekday_plot(elo, fig_loc, filename):
xticks=range(0, 7),
)

min_last_day = elo[-1:]['min'].values
max_last_day = elo[-1:]['max'].values
mean_last_day = elo[-1:]['mean'].values
min_last_day = one(elo[-1:]['min'].values)
max_last_day = one(elo[-1:]['max'].values)
mean_last_day = one(elo[-1:]['mean'].values)

# annotate the lines individually
ax.annotate('min',
Expand Down
31 changes: 14 additions & 17 deletions src/pipeline_import/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
to_numeric,
to_timedelta,
)
from psycopg2 import connect
from utils.types import Json, Visitor


Expand Down Expand Up @@ -107,7 +106,7 @@ def fix_provisional_columns(json_df: pd.DataFrame) -> pd.DataFrame:
for side in ['black', 'white']:
col = f'players_{side}_provisional'
if col in json_df.columns:
json_df[col].fillna(False, inplace=True)
json_df[col] = json_df[col].fillna(False)
else:
json_df[col] = False
return json_df
Expand Down Expand Up @@ -288,7 +287,7 @@ def get_elo_by_weekday(df, category='blitz'):
df['weekday_played'] = df['datetime_played'].dt.weekday

# change to sunday-first, not monday-first
df['weekday_played'].replace(6, -1, inplace=True)
df['weekday_played'] = df['weekday_played'].replace(6, -1)
df['weekday_played'] += 1 # what a dumb way of fixing this

elo = (df.groupby('weekday_played')
Expand All @@ -307,18 +306,16 @@ def get_elo_by_weekday(df, category='blitz'):


def get_weekly_data(pg_cfg, player):
db_connection_string = 'postgresql://{}:{}@{}:{}/{}'

with connect(db_connection_string.format(pg_cfg.read_user,
pg_cfg.read_password,
pg_cfg.host,
pg_cfg.port,
pg_cfg.database)) as con:

sql = f"""SELECT * from chess_games
WHERE player = '{player}'
AND datetime_played >= now()::date - interval '7 days';
"""

df = read_sql_query(sql, con)
db_conn_string = 'postgresql+psycopg2://{}:{}@{}:{}/{}'
db_conn_string = db_conn_string.format(pg_cfg.read_user,
pg_cfg.read_password,
pg_cfg.host,
pg_cfg.port,
pg_cfg.database)
sql = f"""SELECT * from chess_games
WHERE player = '{player}'
AND datetime_played >= now()::date - interval '7 days';
"""

df = read_sql_query(sql, db_conn_string)
return df
24 changes: 9 additions & 15 deletions src/utils/db.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@

import pandas as pd
import psycopg2
from pipeline_import.configs import postgres_cfg


def run_remote_sql_query(sql, **params) -> pd.DataFrame:
pg_cfg = postgres_cfg()
user = pg_cfg.user
password = pg_cfg.password
host = pg_cfg.host
port = pg_cfg.port
database = pg_cfg.database

db = psycopg2.connect(host=host,
database=database,
user=user,
password=password,
port=port,
)

df: pd.DataFrame = pd.read_sql_query(sql, db, params=params)

db_conn_string = 'postgresql+psycopg2://{}:{}@{}:{}/{}'
db_conn_string = db_conn_string.format(pg_cfg.user,
pg_cfg.password,
pg_cfg.host,
pg_cfg.port,
pg_cfg.database)

df: pd.DataFrame = pd.read_sql_query(sql, db_conn_string, params=params)

return df

Expand Down
4 changes: 3 additions & 1 deletion src/vendors/stockfish.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def get_evals(df: pd.DataFrame,
errors='coerce')

df.dropna(inplace=True)
df = pd.concat([df, db_evaluations], axis=0, ignore_index=True)

if not db_evaluations.empty:
df = pd.concat([df, db_evaluations], axis=0, ignore_index=True)

return df
9 changes: 9 additions & 0 deletions tests/__snapshots__/test_plots.ambr
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# serializer version: 1
# name: test_color_stats_plot
'4a9c086a905bde320cfeed437e131934'
# ---
# name: test_create_newsletter
Mail(
asm=None,
Expand Down Expand Up @@ -81,6 +84,12 @@
tracking_settings=None,
)
# ---
# name: test_elo_by_weekday_plot
'574de65d835cd72b0c470efd78483315'
# ---
# name: test_elo_by_weekday_without_games
'2c72ebcedd63e778d31e3ca25711e9d3'
# ---
# name: test_generate_elo_by_weekday_text_generic
"This week, your highest elo in bullet was 300 and your lowest elo was 100. <br><img alt='Elo by weekday' src='cid:elo-by-weekday'><br>"
# ---
Expand Down
20 changes: 7 additions & 13 deletions tests/test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)


def test_color_stats_plot():
def test_color_stats_plot(snapshot):

multiindex = pd.MultiIndex.from_arrays([['blitz', 'blitz', 'bullet'],
['white', 'black', 'black']],
Expand All @@ -41,14 +41,12 @@ def test_color_stats_plot():
with open(file_loc, 'rb') as f:
md5 = hashlib.md5(f.read()).hexdigest()

true_md5 = 'c5db29597bc7936db111444dd8bc35df'

assert md5 == true_md5
assert md5 == snapshot

os.remove(file_loc)


def test_elo_by_weekday_plot():
def test_elo_by_weekday_plot(snapshot):
elo = pd.DataFrame([[0, 1666, 0, 1666.0, 1666.0],
[1, 1685, 7.071, 1680.0, 1690.0],
[2, 1685, 7.071, 1680.0, 1690.0],
Expand All @@ -73,14 +71,12 @@ def test_elo_by_weekday_plot():
with open(file_loc, 'rb') as f:
md5 = hashlib.md5(f.read()).hexdigest()

true_md5 = 'dad710a3d32903926277ea59a4d1e2cc'

assert md5 == true_md5
assert md5 == snapshot

os.remove(file_loc)


def test_elo_by_weekday_without_games():
def test_elo_by_weekday_without_games(snapshot):

empty_elo = pd.DataFrame([],
columns=['weekday_played',
Expand All @@ -94,7 +90,7 @@ def test_elo_by_weekday_without_games():
)

fig_loc = '.'
filename = 'elo_by_weekday_test.png'
filename = 'elo_by_weekday_without_games.png'

plots.make_elo_by_weekday_plot(empty_elo, fig_loc, filename)

Expand All @@ -103,9 +99,7 @@ def test_elo_by_weekday_without_games():
with open(file_loc, 'rb') as f:
md5 = hashlib.md5(f.read()).hexdigest()

true_md5 = '4e93a5e395a23f57b6700aba8d630796'

assert md5 == true_md5
assert md5 == snapshot

os.remove(file_loc)

Expand Down
Loading

0 comments on commit 03a8cfb

Please sign in to comment.