change parameter

2026-03-21 10:16:01 -04:00
parent fd8238dc41
commit 4833ba33c8
2 changed files with 17 additions and 11 deletions
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -17,7 +17,7 @@ params:
          name: default
        sigma_init:
          name: const_initializer
-          val: 0.7
+          val: 1.2
        fixed_sigma: False
    mlp:
      units: [512, 256, 128]
@@ -39,9 +39,9 @@ params:
    reward_shaper:
      scale_value: 1.0
    normalize_advantage: True
-    gamma: 0.99
+    gamma: 0.96
    tau: 0.95
-    learning_rate: 2e-4
+    learning_rate: 5e-4
    lr_schedule: adaptive
    kl_threshold: 0.008
    score_to_win: 20000
@@ -49,12 +49,12 @@ params:
    save_best_after: 50
    save_frequency: 100
    grad_norm: 0.5
-    entropy_coef: 0.005
+    entropy_coef: 0.008
    truncate_grads: True
    bounds_loss_coef: 0.001
    e_clip: 0.2
-    horizon_length: 32
+    horizon_length: 128
-    minibatch_size: 4096
+    minibatch_size: 8192
-    mini_epochs: 5
+    mini_epochs: 4
    critic_coef: 1
    clip_value: True
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -184,12 +184,12 @@ class T1ActionCfg:
@configclass
 class T1GetUpRewardCfg:
    # 1. 姿态基础奖 (引导身体变正)
-    upright = RewTerm(func=mdp.flat_orientation_l2, weight=30.0)
+    upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0)
    # 2. 【条件高度奖】：双高度判定（头+盆骨），且必须脚踩地
    height_with_feet = RewTerm(
        func=standing_with_feet_reward,
-        weight=25.0,  # 作为核心引导，增加权重
+        weight=20.0,  # 作为核心引导，增加权重
        params={
            "min_head_height": 1.10,
            "min_pelvis_height": 0.7,
@@ -224,10 +224,16 @@ class T1GetUpRewardCfg:
        params={"asset_cfg": SceneEntityCfg("robot")}
    )
-    # 5. 成功终极大奖
+    # 5. 时间惩罚 (强制效率)
    time_penalty = RewTerm(
        func=mdp.is_alive,
        weight=-1.2
    )
    # 6. 成功终极大奖
    is_success = RewTerm(
        func=lambda env, keys: env.termination_manager.get_term(keys).float(),
-        weight=300.0,
+        weight=500.0,
        params={"keys": "standing_success"}
    )