From 1fbc9dccac17c5cd7d9ec0a19a8ba74a34897b35 Mon Sep 17 00:00:00 2001
From: ChenXi <chenxiliu2405@gmail.com>
Date: Fri, 20 Mar 2026 09:53:34 -0400
Subject: [PATCH] change parameter

---
 rl_game/get_up/config/ppo_cfg.yaml  | 14 +++++++-------
 rl_game/get_up/config/t1_env_cfg.py | 25 ++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml
index a8c9243..65826ad 100644
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -17,8 +17,8 @@ params:
           name: default
         sigma_init:
           name: const_initializer
-          val: 0.8
-        fixed_sigma: True
+          val: 0.7
+        fixed_sigma: False
     mlp:
       units: [512, 256, 128]
       activation: relu
@@ -41,9 +41,9 @@ params:
     normalize_advantage: True
     gamma: 0.99
     tau: 0.95
-    learning_rate: 5e-4
+    learning_rate: 3e-4
     lr_schedule: adaptive
-    kl_threshold: 0.013
+    kl_threshold: 0.008
     score_to_win: 20000
     max_epochs: 500000
     save_best_after: 50
@@ -54,7 +54,7 @@ params:
     bounds_loss_coef: 0.001
     e_clip: 0.2
     horizon_length: 64
-    minibatch_size: 8192
-    mini_epochs: 4
-    critic_coef: 2
+    minibatch_size: 4096
+    mini_epochs: 5
+    critic_coef: 1
     clip_value: True
\ No newline at end of file
diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py
index a1a8e6a..d821458 100644
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -106,6 +106,18 @@ def arm_push_up_reward(
                        pushing_up_bonus,
                        torch.zeros_like(pushing_up_bonus))
 
+
+def linear_head_height_reward(env: ManagerBasedRLEnv, target_height: float, base_height: float = 0.15) -> torch.Tensor:
+    """
+    计算头部从地面到目标高度的线性增量奖励
+    """
+    head_idx, _ = env.scene["robot"].find_bodies("H2")
+    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
+
+    # 计算相对于地面的提升量，并归一化到 0-1
+    reward = (current_head_h - base_height) / (target_height - base_height)
+    return torch.clamp(reward, min=0.0, max=1.0)
+
 def is_standing_still(
         env: ManagerBasedRLEnv,
         min_head_height: float,
@@ -272,7 +284,14 @@ class T1GetUpRewardCfg:
         }
     )
 
-    # 4. 惩罚项
+    # 4. 引导机器人“向上看”和“抬起头”
+    head_lift = RewTerm(
+        func=linear_head_height_reward,
+        weight=15.0,
+        params={"target_height": 1.1, "base_height": 0.15}
+    )
+
+    # 5. 惩罚项
     undesired_contacts = RewTerm(
         func=mdp.undesired_contacts,
         weight=-2.0,
@@ -283,14 +302,14 @@ class T1GetUpRewardCfg:
         }
     )
 
-    # 5. 抑制跳跃：严厉惩罚向上窜的速度
+    # 6. 抑制跳跃：严厉惩罚向上窜的速度
     root_vel_z_penalty = RewTerm(
         func=root_vel_z_l2_local,
         weight=-1.0,  # 增大负权重
         params={"asset_cfg": SceneEntityCfg("robot")}
     )
 
-    # 6. 抑制滞空 (Airtime Penalty)
+    # 7. 抑制滞空 (Airtime Penalty)
     feet_airtime = RewTerm(
         func=strict_feet_contact_reward,
         weight=-5.0,  # 加大权重，跳一下扣的分比站起来得的分还多