diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index ca98060..fedad83 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -17,7 +17,7 @@ params: name: default sigma_init: name: const_initializer - val: 1.2 + val: 0.5 fixed_sigma: False mlp: units: [512, 256, 128] @@ -39,21 +39,21 @@ params: reward_shaper: scale_value: 1.0 normalize_advantage: True - gamma: 0.98 + gamma: 0.96 tau: 0.95 - learning_rate: 5e-4 + learning_rate: 3e-4 lr_schedule: adaptive - kl_threshold: 0.008 + kl_threshold: 0.015 score_to_win: 20000 max_epochs: 500 save_best_after: 50 save_frequency: 100 - grad_norm: 0.5 - entropy_coef: 0.008 + grad_norm: 1.0 + entropy_coef: 0.005 truncate_grads: True bounds_loss_coef: 0.001 e_clip: 0.2 - horizon_length: 128 + horizon_length: 65536 minibatch_size: 8192 mini_epochs: 4 critic_coef: 1 diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 8562d50..ca51114 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -26,27 +26,35 @@ def standing_with_feet_reward( force_threshold: float = 20.0, max_v_z: float = 0.5 ) -> torch.Tensor: - + # 增加防护:从场景中安全获取 body 索引 head_idx, _ = env.scene["robot"].find_bodies("H2") pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") - curr_head_h = torch.clamp(env.scene["robot"].data.body_state_w[:, head_idx[0], 2], 0.0, 2.0) - curr_pelvis_h = torch.clamp(env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2], 0.0, 2.0) + # 1. 高度奖励:使用更稳定的归一化,限制范围在 [0, 1] + curr_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2] + curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2] - head_score = torch.tanh(curr_head_h / (min_head_height + 1e-6) * 2.0) - pelvis_score = torch.tanh(curr_pelvis_h / (min_pelvis_height + 1e-6) * 2.0) + # 使用 sigmoid 或简单的 min-max 映射,避免除以极小值 + head_score = torch.clamp(curr_head_h / min_head_height, 0.0, 1.2) + pelvis_score = torch.clamp(curr_pelvis_h / min_pelvis_height, 0.0, 1.2) height_reward = (head_score + pelvis_score) / 2.0 + # 2. 足部受力:增加对 NaN 的防御 contact_sensor = env.scene.sensors.get(sensor_cfg.name) + # 某些步数传感器可能未初始化,加个判空 + if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device) + foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1) + # 对巨大的冲击力做剪裁,防止 sigmoid 输入过大 + foot_forces_z = torch.clamp(foot_forces_z, 0.0, 500.0) force_weight = torch.sigmoid((foot_forces_z - force_threshold) / 5.0) + + # 3. 垂直速度惩罚:使用更平滑的惩罚 root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2] - vel_penalty = torch.exp(-2.0 * torch.clamp(torch.abs(root_vel_z) - max_v_z, min=0.0)) + vel_penalty = torch.exp(-torch.abs(root_vel_z) / max_v_z) - influence_weight = torch.clamp((curr_pelvis_h - 0.2) / 0.4, min=0.0, max=1.0) - combined_reward = height_reward * ((1.0 - influence_weight) + influence_weight * force_weight * vel_penalty) - - return combined_reward + # 逻辑组合:高度 * 稳定性 + return height_reward * (0.5 + 0.5 * force_weight * vel_penalty) def universal_arm_support_reward( @@ -138,15 +146,18 @@ def is_standing_still( # --- 2. 配置类 --- T1_JOINT_NAMES = [ - # 腿部 + + 'Head_yaw', 'Head_pitch' + + 'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw', + 'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw', + + 'Waist' + 'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll', 'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch', 'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll', - # 手臂 - 'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw', - 'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw', - # 腰部 - 'Waist' + ] @@ -180,7 +191,7 @@ class T1EventCfg: "yaw": (-3.14, 3.14), # 全向旋转 "x": (0.0, 0.0), "y": (0.0, 0.0), - "z": (0.1, 0.2), + "z": (0.3, 0.4), }, "velocity_range": {}, }, @@ -244,7 +255,7 @@ class T1GetUpRewardCfg: # 6. 成功终极大奖 is_success = RewTerm( func=is_standing_still, - weight=2000.0, + weight=800.0, params={ "min_head_height": 1.05, "min_pelvis_height": 0.75,