prevent gradient explosion
This commit is contained in:
@@ -17,7 +17,7 @@ params:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 1.2
|
||||
val: 0.5
|
||||
fixed_sigma: False
|
||||
mlp:
|
||||
units: [512, 256, 128]
|
||||
@@ -39,21 +39,21 @@ params:
|
||||
reward_shaper:
|
||||
scale_value: 1.0
|
||||
normalize_advantage: True
|
||||
gamma: 0.98
|
||||
gamma: 0.96
|
||||
tau: 0.95
|
||||
learning_rate: 5e-4
|
||||
learning_rate: 3e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
kl_threshold: 0.015
|
||||
score_to_win: 20000
|
||||
max_epochs: 500
|
||||
save_best_after: 50
|
||||
save_frequency: 100
|
||||
grad_norm: 0.5
|
||||
entropy_coef: 0.008
|
||||
grad_norm: 1.0
|
||||
entropy_coef: 0.005
|
||||
truncate_grads: True
|
||||
bounds_loss_coef: 0.001
|
||||
e_clip: 0.2
|
||||
horizon_length: 128
|
||||
horizon_length: 65536
|
||||
minibatch_size: 8192
|
||||
mini_epochs: 4
|
||||
critic_coef: 1
|
||||
|
||||
@@ -26,27 +26,35 @@ def standing_with_feet_reward(
|
||||
force_threshold: float = 20.0,
|
||||
max_v_z: float = 0.5
|
||||
) -> torch.Tensor:
|
||||
|
||||
# 增加防护:从场景中安全获取 body 索引
|
||||
head_idx, _ = env.scene["robot"].find_bodies("H2")
|
||||
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
|
||||
|
||||
curr_head_h = torch.clamp(env.scene["robot"].data.body_state_w[:, head_idx[0], 2], 0.0, 2.0)
|
||||
curr_pelvis_h = torch.clamp(env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2], 0.0, 2.0)
|
||||
# 1. 高度奖励:使用更稳定的归一化,限制范围在 [0, 1]
|
||||
curr_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
|
||||
curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
|
||||
|
||||
head_score = torch.tanh(curr_head_h / (min_head_height + 1e-6) * 2.0)
|
||||
pelvis_score = torch.tanh(curr_pelvis_h / (min_pelvis_height + 1e-6) * 2.0)
|
||||
# 使用 sigmoid 或简单的 min-max 映射,避免除以极小值
|
||||
head_score = torch.clamp(curr_head_h / min_head_height, 0.0, 1.2)
|
||||
pelvis_score = torch.clamp(curr_pelvis_h / min_pelvis_height, 0.0, 1.2)
|
||||
height_reward = (head_score + pelvis_score) / 2.0
|
||||
|
||||
# 2. 足部受力:增加对 NaN 的防御
|
||||
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
|
||||
# 某些步数传感器可能未初始化,加个判空
|
||||
if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device)
|
||||
|
||||
foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
|
||||
# 对巨大的冲击力做剪裁,防止 sigmoid 输入过大
|
||||
foot_forces_z = torch.clamp(foot_forces_z, 0.0, 500.0)
|
||||
force_weight = torch.sigmoid((foot_forces_z - force_threshold) / 5.0)
|
||||
|
||||
# 3. 垂直速度惩罚:使用更平滑的惩罚
|
||||
root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
|
||||
vel_penalty = torch.exp(-2.0 * torch.clamp(torch.abs(root_vel_z) - max_v_z, min=0.0))
|
||||
vel_penalty = torch.exp(-torch.abs(root_vel_z) / max_v_z)
|
||||
|
||||
influence_weight = torch.clamp((curr_pelvis_h - 0.2) / 0.4, min=0.0, max=1.0)
|
||||
combined_reward = height_reward * ((1.0 - influence_weight) + influence_weight * force_weight * vel_penalty)
|
||||
|
||||
return combined_reward
|
||||
# 逻辑组合:高度 * 稳定性
|
||||
return height_reward * (0.5 + 0.5 * force_weight * vel_penalty)
|
||||
|
||||
|
||||
def universal_arm_support_reward(
|
||||
@@ -138,15 +146,18 @@ def is_standing_still(
|
||||
# --- 2. 配置类 ---
|
||||
|
||||
T1_JOINT_NAMES = [
|
||||
# 腿部
|
||||
|
||||
'Head_yaw', 'Head_pitch'
|
||||
|
||||
'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
|
||||
'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
|
||||
|
||||
'Waist'
|
||||
|
||||
'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
|
||||
'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
|
||||
'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll',
|
||||
# 手臂
|
||||
'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
|
||||
'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
|
||||
# 腰部
|
||||
'Waist'
|
||||
|
||||
]
|
||||
|
||||
|
||||
@@ -180,7 +191,7 @@ class T1EventCfg:
|
||||
"yaw": (-3.14, 3.14), # 全向旋转
|
||||
"x": (0.0, 0.0),
|
||||
"y": (0.0, 0.0),
|
||||
"z": (0.1, 0.2),
|
||||
"z": (0.3, 0.4),
|
||||
},
|
||||
"velocity_range": {},
|
||||
},
|
||||
@@ -244,7 +255,7 @@ class T1GetUpRewardCfg:
|
||||
# 6. 成功终极大奖
|
||||
is_success = RewTerm(
|
||||
func=is_standing_still,
|
||||
weight=2000.0,
|
||||
weight=800.0,
|
||||
params={
|
||||
"min_head_height": 1.05,
|
||||
"min_pelvis_height": 0.75,
|
||||
|
||||
Reference in New Issue
Block a user