-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
63 lines (51 loc) · 2.73 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import torch
import numpy as np
# 定义 compute_srl_reward 函数
def compute_humanoid_reward(obs_buf, dof_force_tensor, action):
# type: (Tensor, Tensor, Tensor) -> Tuple[Tensor, Tensor, Tensor]
# reward = torch.ones_like(obs_buf[:, 0])
velocity = obs_buf[:,7] # vx
target_velocity = 1.4
velocity_penalty = - torch.where(velocity < target_velocity, (target_velocity - velocity)**2, torch.zeros_like(velocity))
# 14-28 包括髋关节+膝关节+踝关节
# torque_usage = torch.sum(dof_force_tensor[:,14:28] ** 2, dim=1)
# v1.2.1力矩使用惩罚(假设action代表施加的力矩)
# torque_reward = - 0.1 * torque_usage # 惩罚力矩的平方和
# v1.2.2指数衰减
# torque_reward = 2*torch.exp(-0.1 * torque_usage) # 指数衰减,0.1为衰减系数
# v1.5.12 比例惩罚,力矩绝对值超过100
torque_threshold = 100
torque_usage = dof_force_tensor[:, 14:28]
torque_penalty = torch.where(torch.abs(torque_usage) > torque_threshold,
(torch.abs(torque_usage) - torque_threshold) / torque_threshold,
torch.zeros_like(torque_usage))
torque_reward = - torch.sum(torque_penalty, dim=1)
# reward = -velocity_penalty + torque_reward
reward = velocity_penalty + torque_reward
return reward, velocity_penalty, torque_reward
# 定义样本数据
obs_buf = torch.tensor([
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], # 一些观察数据,其中速度为1.0
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 0.0] # 速度为1.5
], dtype=torch.float32)
dof_force_tensor = torch.tensor([
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 105.0, 0.0, 0.0, 0.0, 0.0, 110.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], # 超过100的力矩
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 95.0, 0.0, 0.0, 0.0, 0.0, 85.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # 未超过100的力矩
], dtype=torch.float32)
action = torch.tensor([
[0.0] * 28,
[0.0] * 28
], dtype=torch.float32)
# 调用compute_humanoid_reward函数
reward, velocity_penalty, torque_penalty = compute_humanoid_reward(obs_buf, dof_force_tensor, action)
# 打印结果
print("Reward:", reward)
print("Velocity Penalty:", velocity_penalty)
print("Torque Penalty:", torque_penalty)
mirror_idx_act_srl = np.array([-4, 5, 6, 7, -0.01, 1, 2, 3])
mirror_act_srl_mat = torch.zeros((mirror_idx_act_srl.shape[0], mirror_idx_act_srl.shape[0]), dtype=torch.float32)
for i, perm in enumerate(mirror_idx_act_srl):
mirror_act_srl_mat[i, int(abs(perm))] = np.sign(perm)
print('mirror act srl mat:',mirror_act_srl_mat)
I = torch.matmul(mirror_act_srl_mat,mirror_act_srl_mat)
print('m*m:',I)