From 72a22bd78a63a7911458de07bf7c80be36a3ebf7 Mon Sep 17 00:00:00 2001
From: ChenXi <chenxiliu2405@gmail.com>
Date: Sat, 21 Mar 2026 08:38:17 -0400
Subject: [PATCH] change arm to push the ground reward function

---
 rl_game/get_up/config/ppo_cfg.yaml  |  2 +-
 rl_game/get_up/config/t1_env_cfg.py | 61 +++++++++++++++++------------
 2 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml
index d896088..a440bf1 100644
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -45,7 +45,7 @@ params:
     lr_schedule: adaptive
     kl_threshold: 0.008
     score_to_win: 20000
-    max_epochs: 500000
+    max_epochs: 1000000
     save_best_after: 50
     save_frequency: 100
     grad_norm: 0.5
diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py
index 5f13245..ab00a58 100644
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -48,47 +48,54 @@ def standing_with_feet_reward(
 
     return combined_reward
 
+
 def arm_push_up_reward(
         env: ManagerBasedRLEnv,
         sensor_cfg: SceneEntityCfg,
-        height_threshold: float = 0.5,
-        min_force: float = 20.0
+        height_threshold: float = 0.6,
+        min_force: float = 2.0  # 大幅降低门槛：只要有 2N 的力就说明碰到了
 ) -> torch.Tensor:
-    """
-    强化版手臂撑地奖励：
-    1. 鼓励手臂产生超过阈值的垂直反作用力。
-    2. 当手臂用力且躯干有向上速度时，给予额外加成。
-    """
-    # 获取手臂传感器数据
     contact_sensor = env.scene.sensors.get(sensor_cfg.name)
     if contact_sensor is None:
         return torch.zeros(env.num_envs, device=env.device)
 
-    # 1. 获取手臂 Z 轴受力 (取所有手臂 Body 的合力或最大力)
+    # 1. 获取手臂受力
+    # 使用 net_forces_w 的范数或 Z 分量
     arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
     max_arm_force = torch.max(arm_forces_z, dim=-1)[0]
 
-    # 归一化受力奖励：在 20N 到 100N 之间线性增长
-    force_reward = torch.clamp((max_arm_force - min_force) / 80.0, min=0.0, max=1.0)
+    # 2. 核心修改：将奖励分为“接触奖”和“撑地奖”
+    # 接触奖：只要碰到了就给 0.2 的基础分
+    is_contact = (max_arm_force > 0.1).float()
 
-    # 2. 获取躯干高度和垂直速度
+    # 撑地奖：力在 2N 到 50N 之间线性增长 (50N 对于支撑足够了)
+    force_reward = torch.clamp((max_arm_force - min_force) / 48.0, min=0.0, max=1.0)
+
+    # 组合：有接触就有基础分，力越大加分越多
+    total_force_score = is_contact * 0.2 + force_reward * 0.8
+
+    # 3. 协同奖励：躯干高度和速度
     pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
     current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
     root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
 
-    # 3. 协同奖励：当手臂在用力推，且躯干正在上升时，给高分
-    # 只有在高度低于阈值（还在撑起阶段）时生效
+    # 只要在撑，且躯干在往上走，就大幅加成
     pushing_up_bonus = torch.where(
-        (max_arm_force > min_force) & (root_vel_z > 0.05),
-        force_reward * (1.0 + root_vel_z * 2.0),  # 速度越快奖励越高
-        force_reward
+        (max_arm_force > min_force) & (root_vel_z > 0.02),
+        total_force_score * (1.0 + torch.clamp(root_vel_z * 5.0, max=2.0)),
+        total_force_score
     )
 
-    # 只有在躯干较低时才发放此奖励
+    # 4. 这里的门槛要严格：一旦站得比较高了（比如 0.6m），就停止对手臂的奖励，
+    # 强迫它把重心转移到腿部
     return torch.where(current_height < height_threshold,
                        pushing_up_bonus,
                        torch.zeros_like(pushing_up_bonus))
 
+def torso_pitch_reward(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    proj_gravity = env.scene[asset_cfg.name].data.projected_gravity_b
+    return torch.square(proj_gravity[:, 0])
+
 def is_standing_still(
         env: ManagerBasedRLEnv,
         min_head_height: float,
@@ -185,7 +192,7 @@ class T1ActionCfg:
 @configclass
 class T1GetUpRewardCfg:
     # 1. 姿态基础奖 (引导身体变正)
-    upright = RewTerm(func=mdp.flat_orientation_l2, weight=10.0)
+    upright = RewTerm(func=mdp.flat_orientation_l2, weight=30.0)
 
     # 2. 【条件高度奖】：双高度判定（头+盆骨），且必须脚踩地
     height_with_feet = RewTerm(
@@ -203,7 +210,7 @@ class T1GetUpRewardCfg:
     # 3. 手臂撑地奖：辅助脱离地面阶段
     arm_push_support = RewTerm(
         func=arm_push_up_reward,
-        weight=15.0,  # 显著增加权重（从 3.0 提到 15.0），让它成为起步的关键
+        weight=45.0,  # 显著增加权重（从 3.0 提到 15.0），让它成为起步的关键
         params={
             "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
             "height_threshold": 0.6,  # 躯干升到 0.6m 前都鼓励手臂用力
@@ -211,7 +218,6 @@ class T1GetUpRewardCfg:
         }
     )
 
-
     # 4. 关节限位惩罚 (新增：防止关节撞死导致数值问题)
     joint_limits = RewTerm(
         func=mdp.joint_pos_limits,
@@ -219,6 +225,13 @@ class T1GetUpRewardCfg:
         params={"asset_cfg": SceneEntityCfg("robot")}
     )
 
+    # 4. 新增：躯干仰角奖 (核心诱导)
+    torso_pitch = RewTerm(
+        func=torso_pitch_reward,  # 或者使用 orientation 相关的项
+        weight=15.0,
+        params={"asset_cfg": SceneEntityCfg("robot")}
+    )
+
     # 5. 成功终极大奖
     is_success = RewTerm(
         func=lambda env, keys: env.termination_manager.get_term(keys).float(),
@@ -240,9 +253,9 @@ class T1GetUpTerminationsCfg:
         params={
             "min_head_height": 1.05,
             "min_pelvis_height": 0.75,
-            "max_angle_error": 0.15,
-            "standing_time": 0.8,
-            "velocity_threshold": 0.15
+            "max_angle_error": 0.3,
+            "standing_time": 0.2,
+            "velocity_threshold": 0.5
         }
     )