prevent gradient explosion

This commit is contained in:
2026-03-22 02:55:07 -04:00
parent 616dd06e78
commit 0315b4cb99
2 changed files with 36 additions and 25 deletions

View File

@@ -26,27 +26,35 @@ def standing_with_feet_reward(
force_threshold: float = 20.0,
max_v_z: float = 0.5
) -> torch.Tensor:
# 增加防护:从场景中安全获取 body 索引
head_idx, _ = env.scene["robot"].find_bodies("H2")
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
curr_head_h = torch.clamp(env.scene["robot"].data.body_state_w[:, head_idx[0], 2], 0.0, 2.0)
curr_pelvis_h = torch.clamp(env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2], 0.0, 2.0)
# 1. 高度奖励:使用更稳定的归一化,限制范围在 [0, 1]
curr_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
head_score = torch.tanh(curr_head_h / (min_head_height + 1e-6) * 2.0)
pelvis_score = torch.tanh(curr_pelvis_h / (min_pelvis_height + 1e-6) * 2.0)
# 使用 sigmoid 或简单的 min-max 映射,避免除以极小值
head_score = torch.clamp(curr_head_h / min_head_height, 0.0, 1.2)
pelvis_score = torch.clamp(curr_pelvis_h / min_pelvis_height, 0.0, 1.2)
height_reward = (head_score + pelvis_score) / 2.0
# 2. 足部受力:增加对 NaN 的防御
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
# 某些步数传感器可能未初始化,加个判空
if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device)
foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
# 对巨大的冲击力做剪裁,防止 sigmoid 输入过大
foot_forces_z = torch.clamp(foot_forces_z, 0.0, 500.0)
force_weight = torch.sigmoid((foot_forces_z - force_threshold) / 5.0)
# 3. 垂直速度惩罚:使用更平滑的惩罚
root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
vel_penalty = torch.exp(-2.0 * torch.clamp(torch.abs(root_vel_z) - max_v_z, min=0.0))
vel_penalty = torch.exp(-torch.abs(root_vel_z) / max_v_z)
influence_weight = torch.clamp((curr_pelvis_h - 0.2) / 0.4, min=0.0, max=1.0)
combined_reward = height_reward * ((1.0 - influence_weight) + influence_weight * force_weight * vel_penalty)
return combined_reward
# 逻辑组合:高度 * 稳定性
return height_reward * (0.5 + 0.5 * force_weight * vel_penalty)
def universal_arm_support_reward(
@@ -138,15 +146,18 @@ def is_standing_still(
# --- 2. 配置类 ---
T1_JOINT_NAMES = [
# 腿部
'Head_yaw', 'Head_pitch'
'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
'Waist'
'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll',
# 手臂
'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
# 腰部
'Waist'
]
@@ -180,7 +191,7 @@ class T1EventCfg:
"yaw": (-3.14, 3.14), # 全向旋转
"x": (0.0, 0.0),
"y": (0.0, 0.0),
"z": (0.1, 0.2),
"z": (0.3, 0.4),
},
"velocity_range": {},
},
@@ -244,7 +255,7 @@ class T1GetUpRewardCfg:
# 6. 成功终极大奖
is_success = RewTerm(
func=is_standing_still,
weight=2000.0,
weight=800.0,
params={
"min_head_height": 1.05,
"min_pelvis_height": 0.75,