forked from goncamateus/Planning-the-path-with-rl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
70 lines (62 loc) · 1.96 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import argparse
import envs
import gymnasium as gym
import numpy as np
import torch
import pandas as pd
from methods.sac import GaussianPolicy
from utils.experiment import strtobool
def load_model(model_path):
model = torch.load(model_path, map_location="cpu")
return model
def main(env_id, caps):
env = gym.make(env_id, render_mode="human")
path = env_id + "-caps" if caps else env_id
state_dict = load_model(f"trained_models/{path}/actor.pt")
num_inputs = np.array(env.observation_space.shape).prod()
num_actions = np.array(env.action_space.shape).prod()
actor = GaussianPolicy(
num_inputs,
num_actions,
log_sig_min=-5,
log_sig_max=2,
hidden_dim=256,
epsilon=1e-6,
action_space=env.action_space,
)
actor.load_state_dict(state_dict)
actor.eval()
actor.to("cpu")
logs = []
for _ in range(1000):
obs, _ = env.reset()
done = False
trunc = False
while not (done or trunc):
state = torch.Tensor(obs.reshape(1, -1))
action = actor.get_action(state)[0]
obs, reward, done, trunc, info = env.step(action)
log = {
"Episode Length": info["reward_steps"],
"Episode Length (seconds)": info["reward_steps"] * 0.025,
"Cumulative Pairwise Action Distance": info["reward_action_var"],
}
if "Obstacle" in env_id:
hit = int(reward == -1000)
log[f"Hit Obstacle"] = hit
logs.append(log)
env.close()
df = pd.DataFrame(logs)
df.to_csv(f"results_analysis/{path}.csv")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--gym-id", type=str, default="Baseline-v0")
parser.add_argument(
"--caps",
type=lambda x: bool(strtobool(x)),
default=False,
nargs="?",
const=True,
)
args = parser.parse_args()
main(env_id=args.gym_id, caps=args.caps)