-
Notifications
You must be signed in to change notification settings - Fork 1
/
ai_player.py
135 lines (119 loc) · 3.75 KB
/
ai_player.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import config as cfg
import numpy as np
from nn import (
DQN,
compute_loss,
reward,
preprocessing,
softmax_action,
epsilon_greedy_action,
)
from subfunctions import get_random_gap, get_random_position
import math
import torch
def init_ai_player(id, model, iteration):
optimizer = torch.optim.SGD(model.parameters(), lr=cfg.learning_rate)
player_colors = [
(255, 0, 0),
(0, 255, 0),
(0, 0, 255),
(255, 255, 0),
(255, 0, 255),
(0, 255, 255),
]
color = player_colors[id]
start_pos, start_dir = get_random_position()
start_gap, start_line = get_random_gap()
game_state_pos = np.array(
[
np.arange(
int(round(start_pos[0], 0)) - int(cfg.player_size / 2),
int(round(start_pos[0], 0)) + int(cfg.player_size / 2),
),
np.arange(
int(round(start_pos[1], 0)) - int(cfg.player_size / 2),
int(round(start_pos[1], 0)) + int(cfg.player_size / 2),
),
]
)
gap = False
player = {
"pos": start_pos,
"game_state_pos": game_state_pos,
"dir": start_dir,
"angle": 0,
"color": color,
"alive": True,
"length": 1,
"speed": cfg.speed,
"id": id,
"pos_history": [start_pos],
"size": cfg.player_size,
"gap": gap,
"gap_history": [gap],
"gap_timer": start_gap,
"line_timer": start_line,
"del_angle": 5,
"items": [],
"item_timer": [],
"left": -1,
"right": 1,
"ai": True,
"model": model,
"outcomes": torch.tensor([0]),
"optimizer": optimizer,
"pred_actions": torch.tensor(data=[]),
"actions": torch.tensor(data=[], dtype=torch.int64),
}
return player
def update_ai_player_direction(player, game_state, players):
# Preprocess player data for the nn
prepro = preprocessing(player, game_state)
section, densities = prepro.get_game_variables()
# get model output
model = player["model"]
q_values = model.forward(section, densities)
pred_action = torch.argmax(q_values)
if cfg.training:
# change predicted actions to actual actions by epsilon or softmax exploration
action = epsilon_greedy_action(q_values)
# Update pred_actions and actions
player["pred_actions"] = torch.cat(
[player["pred_actions"], q_values.unsqueeze(0)], dim=0
)
player["actions"] = torch.cat(
[player["actions"], action.clone().unsqueeze(0)],
dim=0,
)
# update model
optimizer = player["optimizer"]
optimizer.zero_grad()
outcome = reward(player, players)
player["outcomes"] = torch.cat(
[player["outcomes"], outcome.unsqueeze(0)], dim=0
)
loss = compute_loss(
player["outcomes"],
player["pred_actions"],
player["actions"],
)
# Backpropagation
loss.backward(retain_graph=True)
optimizer.step()
# update player variables
player["model"] = model
player["optimizer"] = optimizer
# Change direction depending on model output
if pred_action == 0:
player["angle"] -= player["del_angle"]
if pred_action == 1:
player["angle"] += player["del_angle"]
# Normalize the angle to keep it within 0-360 degrees range
if player["angle"] < 0:
player["angle"] += 360
elif player["angle"] >= 360:
player["angle"] -= 360
# Convert the angle to radians and update the direction vector
rad_angle = math.radians(player["angle"])
player["dir"] = [math.cos(rad_angle), math.sin(rad_angle)]
return player