-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_rewards.py
58 lines (47 loc) · 1.64 KB
/
plot_rewards.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
""" A toy example of playing against pretrianed AI on Leduc Hold'em
"""
from tqdm import tqdm
from mcts import MCTS, TreeSearch, init_game
from mcts_ev import MCTS_Expected
import rlcard
from rlcard import models
from rlcard.agents import CFRAgent
from rlcard.agents import RandomAgent
from rlcard.utils import print_card
import types
import numpy as np
from run import run
def step(self, state):
return self.eval_step(state)[0]
trials = 5
parameter_testing = {}
rollout_nums_mcts = 50
rollout_nums_mcts_ev = 250
env = rlcard.make("leduc-holdem")
env.game.init_game = types.MethodType(init_game, env.game)
cfr_agent = ("CFRAgent", models.load("leduc-holdem-cfr").agents[0])
random_agent = ("RandomAgent", RandomAgent(num_actions=env.num_actions))
mcts_ev_agent = ("MCTS with EV", MCTS_Expected(env, rollout_nums_mcts_ev, 0))
mcts_agent = ("MCTS", MCTS(env, rollout_nums_mcts, 0))
rule1_agent = ("Rule1 Agent", models.load("leduc-holdem-rule-v1").agents[0])
rule2_agent = ("Rule2 Agent", models.load("leduc-holdem-rule-v2").agents[0])
cfr_agent[1].step = types.MethodType(step, cfr_agent[1])
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_theme()
mcts_both_rewards = []
env.set_agents(
[MCTS_Expected(env, rollout_nums_mcts, 0), MCTS(env, rollout_nums_mcts, 1)]
)
for i in tqdm(range(250)):
trajectories, payoffs = env.run(is_training=True)
mcts_both_rewards.append(payoffs[0])
# break
final_reward_both = [
sum(mcts_both_rewards[: i + 1]) for i in range(len(mcts_both_rewards))
]
plt.plot(final_reward_both)
plt.xlabel("Episode")
plt.ylabel("MCTS EV Total Reward")
plt.title("MCTS EV vs MCTS")
plt.savefig("pics/MCTS_both.png")