From 1fbc9dccac17c5cd7d9ec0a19a8ba74a34897b35 Mon Sep 17 00:00:00 2001 From: ChenXi Date: Fri, 20 Mar 2026 09:53:34 -0400 Subject: [PATCH] change parameter --- rl_game/get_up/config/ppo_cfg.yaml | 14 +++++++------- rl_game/get_up/config/t1_env_cfg.py | 25 ++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index a8c9243..65826ad 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -17,8 +17,8 @@ params: name: default sigma_init: name: const_initializer - val: 0.8 - fixed_sigma: True + val: 0.7 + fixed_sigma: False mlp: units: [512, 256, 128] activation: relu @@ -41,9 +41,9 @@ params: normalize_advantage: True gamma: 0.99 tau: 0.95 - learning_rate: 5e-4 + learning_rate: 3e-4 lr_schedule: adaptive - kl_threshold: 0.013 + kl_threshold: 0.008 score_to_win: 20000 max_epochs: 500000 save_best_after: 50 @@ -54,7 +54,7 @@ params: bounds_loss_coef: 0.001 e_clip: 0.2 horizon_length: 64 - minibatch_size: 8192 - mini_epochs: 4 - critic_coef: 2 + minibatch_size: 4096 + mini_epochs: 5 + critic_coef: 1 clip_value: True \ No newline at end of file diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index a1a8e6a..d821458 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -106,6 +106,18 @@ def arm_push_up_reward( pushing_up_bonus, torch.zeros_like(pushing_up_bonus)) + +def linear_head_height_reward(env: ManagerBasedRLEnv, target_height: float, base_height: float = 0.15) -> torch.Tensor: + """ + 计算头部从地面到目标高度的线性增量奖励 + """ + head_idx, _ = env.scene["robot"].find_bodies("H2") + current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2] + + # 计算相对于地面的提升量,并归一化到 0-1 + reward = (current_head_h - base_height) / (target_height - base_height) + return torch.clamp(reward, min=0.0, max=1.0) + def is_standing_still( env: ManagerBasedRLEnv, min_head_height: float, @@ -272,7 +284,14 @@ class T1GetUpRewardCfg: } ) - # 4. 惩罚项 + # 4. 引导机器人“向上看”和“抬起头” + head_lift = RewTerm( + func=linear_head_height_reward, + weight=15.0, + params={"target_height": 1.1, "base_height": 0.15} + ) + + # 5. 惩罚项 undesired_contacts = RewTerm( func=mdp.undesired_contacts, weight=-2.0, @@ -283,14 +302,14 @@ class T1GetUpRewardCfg: } ) - # 5. 抑制跳跃:严厉惩罚向上窜的速度 + # 6. 抑制跳跃:严厉惩罚向上窜的速度 root_vel_z_penalty = RewTerm( func=root_vel_z_l2_local, weight=-1.0, # 增大负权重 params={"asset_cfg": SceneEntityCfg("robot")} ) - # 6. 抑制滞空 (Airtime Penalty) + # 7. 抑制滞空 (Airtime Penalty) feet_airtime = RewTerm( func=strict_feet_contact_reward, weight=-5.0, # 加大权重,跳一下扣的分比站起来得的分还多