change parameter

2026-03-20 09:53:34 -04:00
parent 49da77db51
commit 1fbc9dccac
2 changed files with 29 additions and 10 deletions
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -17,8 +17,8 @@ params:
          name: default
        sigma_init:
          name: const_initializer
-          val: 0.8
-        fixed_sigma: True
+          val: 0.7
+        fixed_sigma: False
    mlp:
      units: [512, 256, 128]
      activation: relu
@@ -41,9 +41,9 @@ params:
    normalize_advantage: True
    gamma: 0.99
    tau: 0.95
-    learning_rate: 5e-4
+    learning_rate: 3e-4
    lr_schedule: adaptive
-    kl_threshold: 0.013
+    kl_threshold: 0.008
    score_to_win: 20000
    max_epochs: 500000
    save_best_after: 50
@@ -54,7 +54,7 @@ params:
    bounds_loss_coef: 0.001
    e_clip: 0.2
    horizon_length: 64
-    minibatch_size: 8192
-    mini_epochs: 4
-    critic_coef: 2
+    minibatch_size: 4096
+    mini_epochs: 5
+    critic_coef: 1
    clip_value: True
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -106,6 +106,18 @@ def arm_push_up_reward(
                       pushing_up_bonus,
                       torch.zeros_like(pushing_up_bonus))

+
+def linear_head_height_reward(env: ManagerBasedRLEnv, target_height: float, base_height: float = 0.15) -> torch.Tensor:
+    """
+    计算头部从地面到目标高度的线性增量奖励
+    """
+    head_idx, _ = env.scene["robot"].find_bodies("H2")
+    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
+
+    # 计算相对于地面的提升量，并归一化到 0-1
+    reward = (current_head_h - base_height) / (target_height - base_height)
+    return torch.clamp(reward, min=0.0, max=1.0)
+
 def is_standing_still(
        env: ManagerBasedRLEnv,
        min_head_height: float,
@@ -272,7 +284,14 @@ class T1GetUpRewardCfg:
        }
    )

-    # 4. 惩罚项
+    # 4. 引导机器人“向上看”和“抬起头”
+    head_lift = RewTerm(
+        func=linear_head_height_reward,
+        weight=15.0,
+        params={"target_height": 1.1, "base_height": 0.15}
+    )
+
+    # 5. 惩罚项
    undesired_contacts = RewTerm(
        func=mdp.undesired_contacts,
        weight=-2.0,
@@ -283,14 +302,14 @@ class T1GetUpRewardCfg:
        }
    )

-    # 5. 抑制跳跃：严厉惩罚向上窜的速度
+    # 6. 抑制跳跃：严厉惩罚向上窜的速度
    root_vel_z_penalty = RewTerm(
        func=root_vel_z_l2_local,
        weight=-1.0,  # 增大负权重
        params={"asset_cfg": SceneEntityCfg("robot")}
    )

-    # 6. 抑制滞空 (Airtime Penalty)
+    # 7. 抑制滞空 (Airtime Penalty)
    feet_airtime = RewTerm(
        func=strict_feet_contact_reward,
        weight=-5.0,  # 加大权重，跳一下扣的分比站起来得的分还多