From 4833ba33c8e08ad2ffc8fc8e3ba972f954f79323 Mon Sep 17 00:00:00 2001
From: ChenXi <chenxiliu2405@gmail.com>
Date: Sat, 21 Mar 2026 10:16:01 -0400
Subject: [PATCH] change parameter

---
 rl_game/get_up/config/ppo_cfg.yaml  | 14 +++++++-------
 rl_game/get_up/config/t1_env_cfg.py | 14 ++++++++++----
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml
index a440bf1..5c28ead 100644
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -17,7 +17,7 @@ params:
           name: default
         sigma_init:
           name: const_initializer
-          val: 0.7
+          val: 1.2
         fixed_sigma: False
     mlp:
       units: [512, 256, 128]
@@ -39,9 +39,9 @@ params:
     reward_shaper:
       scale_value: 1.0
     normalize_advantage: True
-    gamma: 0.99
+    gamma: 0.96
     tau: 0.95
-    learning_rate: 2e-4
+    learning_rate: 5e-4
     lr_schedule: adaptive
     kl_threshold: 0.008
     score_to_win: 20000
@@ -49,12 +49,12 @@ params:
     save_best_after: 50
     save_frequency: 100
     grad_norm: 0.5
-    entropy_coef: 0.005
+    entropy_coef: 0.008
     truncate_grads: True
     bounds_loss_coef: 0.001
     e_clip: 0.2
-    horizon_length: 32
-    minibatch_size: 4096
-    mini_epochs: 5
+    horizon_length: 128
+    minibatch_size: 8192
+    mini_epochs: 4
     critic_coef: 1
     clip_value: True
\ No newline at end of file
diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py
index 870f3ab..097f8b5 100644
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -184,12 +184,12 @@ class T1ActionCfg:
 @configclass
 class T1GetUpRewardCfg:
     # 1. 姿态基础奖 (引导身体变正)
-    upright = RewTerm(func=mdp.flat_orientation_l2, weight=30.0)
+    upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0)
 
     # 2. 【条件高度奖】：双高度判定（头+盆骨），且必须脚踩地
     height_with_feet = RewTerm(
         func=standing_with_feet_reward,
-        weight=25.0,  # 作为核心引导，增加权重
+        weight=20.0,  # 作为核心引导，增加权重
         params={
             "min_head_height": 1.10,
             "min_pelvis_height": 0.7,
@@ -224,10 +224,16 @@ class T1GetUpRewardCfg:
         params={"asset_cfg": SceneEntityCfg("robot")}
     )
 
-    # 5. 成功终极大奖
+    # 5. 时间惩罚 (强制效率)
+    time_penalty = RewTerm(
+        func=mdp.is_alive,
+        weight=-1.2
+    )
+
+    # 6. 成功终极大奖
     is_success = RewTerm(
         func=lambda env, keys: env.termination_manager.get_term(keys).float(),
-        weight=300.0,
+        weight=500.0,
         params={"keys": "standing_success"}
     )