-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcounterfactual_conferences_2023.py
297 lines (252 loc) · 11 KB
/
counterfactual_conferences_2023.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import os, re
import numpy as np
import pandas as pd
def resort_columns(dataframe: pd.DataFrame):
def sort_key(item):
""" Function to sort tuples by the second item in the tuple """
if isinstance(item, str):
try:
return float(item.split(",")[1].strip(") '"))
except:
return float("inf")
return float("inf")
# Sort the second row by the second items of the tuples in ascending order
dataframe.iloc[1] = dataframe.iloc[1].sort_values(key=lambda col: col.map(sort_key))
return dataframe
def clean_dataframe(df):
""" Clean the DataFrame by stripping whitespace and replacing empty strings with NaN """
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
df.replace("", pd.NA, inplace=True)
return df
def find_conference_column(df, conference):
""" Returns the index of the column for the target conference. """
for col in df.columns:
if conference in str(col):
return col
return None
def realign_teams(df: pd.DataFrame, n_teams_score: int = 5):
""" Function to realign teams """
# Ensure that the dataframe doesn't have any superfluous `'` characters in it.
def clean_quotes(s):
""" Function to clean single quotes from a string """
if isinstance(s, str):
return s.replace("'", "")
return s
# Apply the clean_quotes function to all elements in the DataFrame
df = df.applymap(clean_quotes)
# Establish the existing conferences & their scores in a dictionary.
def parse_tuple_string(s):
""" Function to parse tuples from strings """
match = re.match(r"\(([^,]+),\s*([^)]+)\)", s)
if match:
return (match.group(1).strip(), match.group(2).strip())
return None
# Create the dictionary
existing_conference_dict = {}
# Use the first row as keys
keys = [parse_tuple_string(x) for x in df.iloc[0].dropna()]
# Initialize empty lists for each key
for key in keys:
existing_conference_dict[key] = []
# Iterate over the rest of the DataFrame to populate the dictionary
for row in df.iloc[1:].itertuples(index=False):
for i, value in enumerate(row):
if pd.notna(value):
parsed_value = parse_tuple_string(value)
if parsed_value:
existing_conference_dict[keys[i]].append(parsed_value)
# Print the result dictionary
for key, values in existing_conference_dict.items():
print(f"{key}: {values}")
# Working dict for use downstream
working_conf_dict = existing_conference_dict.copy()
# Define the team movements
team_movements = {
"SEC": ["Oklahoma", "Texas"],
"Big Ten": ["Washington", "Oregon", "UCLA", "Southern Cal", "USC"],
"Big 12": [
"Arizona",
"Arizona St.",
"Utah",
"Colorado",
"BYU",
"Cincinnati",
"Houston",
"UCF",
],
"ACC": ["California", "Stanford", "SMU"],
"American": [
"Charlotte",
"Florida Atlantic",
"North Texas",
"Rice",
"UAB",
"UTSA",
],
}
# Create a map of old to new conferences
new_conference_map = {}
for new_conference, teams in team_movements.items():
for team in teams:
new_conference_map[team] = new_conference
def find_team_conference(result_dict: dict, team_to_find: str) -> tuple:
"""Function to find the team and print its details"""
for conference, teamsss in result_dict.items():
for teammm in teamsss:
if teammm[0] == team_to_find:
print(f"Team: {team_to_find}")
print(f"Previous Conference: {conference[0]}")
print(f"Score: {teammm[1]}")
return (teammm, conference)
def move_team_to_new_conf(work_conf_d, team_to_move, move_to_conf):
team_tuple = None
old_conference_tuple = None
# Find the team and its current conference
for conference, teams in work_conf_d.items():
for team in teams:
if team[0] == team_to_move:
team_tuple = team
old_conference_tuple = conference
break
if team_tuple:
break
# Remove the team from its old conference
if old_conference_tuple and team_tuple:
work_conf_d[old_conference_tuple].remove(team_tuple)
print(f"Removed {team_to_move} from {old_conference_tuple}")
# Find the new conference key
new_conference_tuple = None
for conference in work_conf_d.keys():
if conference[0] == move_to_conf:
new_conference_tuple = conference
break
# Add the team to the new conference
if new_conference_tuple:
work_conf_d[new_conference_tuple].append(team_tuple)
print(f"Added {team_to_move} to {new_conference_tuple}")
return work_conf_d
problem_teams = ("Washington", "Texas")
# Iterate over each cell and update the conference
for move_to_conf, realigned_teams_list in team_movements.items():
if realigned_teams_list and (
isinstance(realigned_teams_list, list)
or isinstance(realigned_teams_list, tuple)
):
# If the list exists:
for team_being_realigned in realigned_teams_list:
# Find the team in question in its existing spot in the standings with its original, old conference.
# # Format: Team_tuple, conf_tuple
exist_spot_tuple = find_team_conference(
existing_conference_dict, team_being_realigned
)
# if isinstance(cell_value, str):
# print(exist_spot_tuple)
if exist_spot_tuple:
# The team being realigned received votes in the polls. Realign them in this model.
team_teamscore_tuple = exist_spot_tuple[0]
team_name = team_teamscore_tuple[
0
] # Should duplicate team_being_realigned
old_conference_tuple = exist_spot_tuple[1]
old_conference_name = old_conference_tuple[0]
# print(team_name)
pyes = False
if team_name in problem_teams:
pyes = True
print(team_name)
print(move_to_conf)
# Now move the teams to their new conferences.
working_conf_dict = move_team_to_new_conf(
work_conf_d=working_conf_dict,
team_to_move=team_being_realigned,
move_to_conf=move_to_conf,
)
print("- - - - - - - - - - - - - - - -")
# Now re-sort the teams and re-calculate team scores
print(working_conf_dict, "\n")
def sort_conference_tuples(working_conf_dict):
""" Function to sort the tuples in each conference"""
sorted_dict = {}
for conference, teams in working_conf_dict.items():
# Sort the list of teams by the second item in the tuple (score), then by the first item (team name) if scores are equal
sorted_teams = sorted(
teams,
key=lambda x: (float(x[1]), x[0])
if x[1].replace(".", "", 1).isdigit()
else (float("inf"), x[0]),
)
sorted_dict[conference] = sorted_teams
return sorted_dict
# Sort the conference tuples
sorted_conf_dict = sort_conference_tuples(working_conf_dict)
# Remove conferences with no teams
sorted_conf_dict = {
conf: teams for conf, teams in sorted_conf_dict.items() if teams
}
# Re-Score the conferences
def rescore_conferences(sort_conf_dict, X=5):
"""Function to score each conference based on the sum of the lowest X scores"""
scored_dict = {}
for conference, teams in sort_conf_dict.items():
# Calculate the sum of the lowest X scores or set to 'DNS' if not enough teams
if len(teams) >= X:
lowest_scores_sum = sum(float(team[1]) for team in teams[:X])
new_conference_tuple = (conference[0], str(lowest_scores_sum))
else:
new_conference_tuple = (conference[0], "DNS")
scored_dict[new_conference_tuple] = teams
return scored_dict
n_teams_score = int(n_teams_score)
sorted_conf_dict = rescore_conferences(sorted_conf_dict, n_teams_score)
# Print the sorted dictionary
for key, values in sorted_conf_dict.items():
print(f"{key}: {values}")
print("\n")
# Now convert this dict back into a DF
# Create a list of columns for the DataFrame
columns = [conf for conf in sorted_conf_dict.keys()]
# Find the maximum number of teams in any conference
max_teams = max(len(teams) for teams in sorted_conf_dict.values())
# Create an empty DataFrame with the number of columns as there are conferences
df = pd.DataFrame(index=range(max_teams + 1), columns=range(len(columns)))
# Set the first row with the conference information
for i, conf in enumerate(columns):
df.iat[0, i] = conf
# Populate the DataFrame with the team tuples
for col_index, conf in enumerate(columns):
teams = sorted_conf_dict[conf]
for row_index, team in enumerate(
teams, start=1
): # Start at row 1 to skip the header
df.iat[row_index, col_index] = team
print(df)
return df
if __name__ == "__main__":
# Load the CSV files
path_to_your_file = os.path.abspath(os.path.curdir)
df_4team = pd.read_csv(
os.path.join(path_to_your_file, "data/2023/4_team/2023_week_final.csv"),
header=None,
)
df_5team = pd.read_csv(
os.path.join(path_to_your_file, "data/2023/5_team/2023_week_final.csv"),
header=None,
)
# Realign teams in both DataFrames
df_4team_realigned = realign_teams(df_4team, n_teams_score=4)
df_5team_realigned = realign_teams(df_5team, n_teams_score=5)
# Save the updated DataFrames to CSV
df_4team_realigned.to_csv(
os.path.join(
path_to_your_file, "data/2023/4_team/2023_week_final_4team_realigned.csv"
),
index=False,
header=False,
)
df_5team_realigned.to_csv(
os.path.join(
path_to_your_file, "data/2023/5_team/2023_week_final_5team_realigned.csv"
),
index=False,
header=False,
)