-
Notifications
You must be signed in to change notification settings - Fork 0
/
CEM.py
59 lines (46 loc) · 1.9 KB
/
CEM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
import gym
import CannonEnv # Make sure to import your CannonEnv class
def train_cem(env, n_iterations=100, batch_size=50, elite_frac=0.2, initial_std=10.0):
"""
Train an agent using the cross-entropy method.
Parameters:
- env: The environment to train on.
- n_iterations: Number of training iterations.
- batch_size: Number of samples per iteration.
- elite_frac: Fraction of samples to use as elite set.
- initial_std: Initial standard deviation of the action distribution.
"""
n_elite = int(batch_size * elite_frac)
# Initialize mean and standard deviation of the action distribution
mean = np.zeros(env.action_space.shape)
std = np.full(env.action_space.shape, initial_std)
for iteration in range(n_iterations):
# Sample actions
actions = np.random.normal(mean, std, size=(batch_size, env.action_space.shape[0]))
# Evaluate actions
rewards = np.array([evaluate_action(env, action) for action in actions])
# Select elite samples
elite_idxs = rewards.argsort()[-n_elite:]
elite_actions = actions[elite_idxs]
# Update distribution parameters
mean = elite_actions.mean(axis=0)
std = elite_actions.std(axis=0)
print(f"Iteration {iteration + 1}/{n_iterations}: mean reward = {rewards.mean()}")
return mean
def evaluate_action(env, action):
"""
Evaluate a single action in the environment.
Parameters:
- env: The environment to evaluate the action in.
- action: The action to evaluate.
Returns:
- reward: The reward obtained from the action.
"""
observation, info = env.reset()
observation, reward, done, _, info = env.step(action)
return reward
if __name__ == "__main__":
env = CannonEnv() # Initialize your environment
optimal_action = train_cem(env)
print(f"Optimal initial speed found: {optimal_action}")