prevent gradient explosion
This commit is contained in:
@@ -17,7 +17,7 @@ params:
|
|||||||
name: default
|
name: default
|
||||||
sigma_init:
|
sigma_init:
|
||||||
name: const_initializer
|
name: const_initializer
|
||||||
val: 1.2
|
val: 0.5
|
||||||
fixed_sigma: False
|
fixed_sigma: False
|
||||||
mlp:
|
mlp:
|
||||||
units: [512, 256, 128]
|
units: [512, 256, 128]
|
||||||
@@ -39,21 +39,21 @@ params:
|
|||||||
reward_shaper:
|
reward_shaper:
|
||||||
scale_value: 1.0
|
scale_value: 1.0
|
||||||
normalize_advantage: True
|
normalize_advantage: True
|
||||||
gamma: 0.98
|
gamma: 0.96
|
||||||
tau: 0.95
|
tau: 0.95
|
||||||
learning_rate: 5e-4
|
learning_rate: 3e-4
|
||||||
lr_schedule: adaptive
|
lr_schedule: adaptive
|
||||||
kl_threshold: 0.008
|
kl_threshold: 0.015
|
||||||
score_to_win: 20000
|
score_to_win: 20000
|
||||||
max_epochs: 500
|
max_epochs: 500
|
||||||
save_best_after: 50
|
save_best_after: 50
|
||||||
save_frequency: 100
|
save_frequency: 100
|
||||||
grad_norm: 0.5
|
grad_norm: 1.0
|
||||||
entropy_coef: 0.008
|
entropy_coef: 0.005
|
||||||
truncate_grads: True
|
truncate_grads: True
|
||||||
bounds_loss_coef: 0.001
|
bounds_loss_coef: 0.001
|
||||||
e_clip: 0.2
|
e_clip: 0.2
|
||||||
horizon_length: 128
|
horizon_length: 65536
|
||||||
minibatch_size: 8192
|
minibatch_size: 8192
|
||||||
mini_epochs: 4
|
mini_epochs: 4
|
||||||
critic_coef: 1
|
critic_coef: 1
|
||||||
|
|||||||
@@ -26,27 +26,35 @@ def standing_with_feet_reward(
|
|||||||
force_threshold: float = 20.0,
|
force_threshold: float = 20.0,
|
||||||
max_v_z: float = 0.5
|
max_v_z: float = 0.5
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
|
# 增加防护:从场景中安全获取 body 索引
|
||||||
head_idx, _ = env.scene["robot"].find_bodies("H2")
|
head_idx, _ = env.scene["robot"].find_bodies("H2")
|
||||||
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
|
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
|
||||||
|
|
||||||
curr_head_h = torch.clamp(env.scene["robot"].data.body_state_w[:, head_idx[0], 2], 0.0, 2.0)
|
# 1. 高度奖励:使用更稳定的归一化,限制范围在 [0, 1]
|
||||||
curr_pelvis_h = torch.clamp(env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2], 0.0, 2.0)
|
curr_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
|
||||||
|
curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
|
||||||
|
|
||||||
head_score = torch.tanh(curr_head_h / (min_head_height + 1e-6) * 2.0)
|
# 使用 sigmoid 或简单的 min-max 映射,避免除以极小值
|
||||||
pelvis_score = torch.tanh(curr_pelvis_h / (min_pelvis_height + 1e-6) * 2.0)
|
head_score = torch.clamp(curr_head_h / min_head_height, 0.0, 1.2)
|
||||||
|
pelvis_score = torch.clamp(curr_pelvis_h / min_pelvis_height, 0.0, 1.2)
|
||||||
height_reward = (head_score + pelvis_score) / 2.0
|
height_reward = (head_score + pelvis_score) / 2.0
|
||||||
|
|
||||||
|
# 2. 足部受力:增加对 NaN 的防御
|
||||||
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
|
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
|
||||||
|
# 某些步数传感器可能未初始化,加个判空
|
||||||
|
if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device)
|
||||||
|
|
||||||
foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
|
foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
|
||||||
|
# 对巨大的冲击力做剪裁,防止 sigmoid 输入过大
|
||||||
|
foot_forces_z = torch.clamp(foot_forces_z, 0.0, 500.0)
|
||||||
force_weight = torch.sigmoid((foot_forces_z - force_threshold) / 5.0)
|
force_weight = torch.sigmoid((foot_forces_z - force_threshold) / 5.0)
|
||||||
|
|
||||||
|
# 3. 垂直速度惩罚:使用更平滑的惩罚
|
||||||
root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
|
root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
|
||||||
vel_penalty = torch.exp(-2.0 * torch.clamp(torch.abs(root_vel_z) - max_v_z, min=0.0))
|
vel_penalty = torch.exp(-torch.abs(root_vel_z) / max_v_z)
|
||||||
|
|
||||||
influence_weight = torch.clamp((curr_pelvis_h - 0.2) / 0.4, min=0.0, max=1.0)
|
# 逻辑组合:高度 * 稳定性
|
||||||
combined_reward = height_reward * ((1.0 - influence_weight) + influence_weight * force_weight * vel_penalty)
|
return height_reward * (0.5 + 0.5 * force_weight * vel_penalty)
|
||||||
|
|
||||||
return combined_reward
|
|
||||||
|
|
||||||
|
|
||||||
def universal_arm_support_reward(
|
def universal_arm_support_reward(
|
||||||
@@ -138,15 +146,18 @@ def is_standing_still(
|
|||||||
# --- 2. 配置类 ---
|
# --- 2. 配置类 ---
|
||||||
|
|
||||||
T1_JOINT_NAMES = [
|
T1_JOINT_NAMES = [
|
||||||
# 腿部
|
|
||||||
|
'Head_yaw', 'Head_pitch'
|
||||||
|
|
||||||
|
'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
|
||||||
|
'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
|
||||||
|
|
||||||
|
'Waist'
|
||||||
|
|
||||||
'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
|
'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
|
||||||
'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
|
'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
|
||||||
'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll',
|
'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll',
|
||||||
# 手臂
|
|
||||||
'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
|
|
||||||
'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
|
|
||||||
# 腰部
|
|
||||||
'Waist'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -180,7 +191,7 @@ class T1EventCfg:
|
|||||||
"yaw": (-3.14, 3.14), # 全向旋转
|
"yaw": (-3.14, 3.14), # 全向旋转
|
||||||
"x": (0.0, 0.0),
|
"x": (0.0, 0.0),
|
||||||
"y": (0.0, 0.0),
|
"y": (0.0, 0.0),
|
||||||
"z": (0.1, 0.2),
|
"z": (0.3, 0.4),
|
||||||
},
|
},
|
||||||
"velocity_range": {},
|
"velocity_range": {},
|
||||||
},
|
},
|
||||||
@@ -244,7 +255,7 @@ class T1GetUpRewardCfg:
|
|||||||
# 6. 成功终极大奖
|
# 6. 成功终极大奖
|
||||||
is_success = RewTerm(
|
is_success = RewTerm(
|
||||||
func=is_standing_still,
|
func=is_standing_still,
|
||||||
weight=2000.0,
|
weight=800.0,
|
||||||
params={
|
params={
|
||||||
"min_head_height": 1.05,
|
"min_head_height": 1.05,
|
||||||
"min_pelvis_height": 0.75,
|
"min_pelvis_height": 0.75,
|
||||||
|
|||||||
Reference in New Issue
Block a user