change parameter

2026-03-20 10:51:07 -04:00
parent 1fbc9dccac
commit 6d2ad9846a
2 changed files with 6 additions and 7 deletions
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -41,19 +41,19 @@ params:
    normalize_advantage: True
    gamma: 0.99
    tau: 0.95
-    learning_rate: 3e-4
+    learning_rate: 2e-4
    lr_schedule: adaptive
    kl_threshold: 0.008
    score_to_win: 20000
    max_epochs: 500000
    save_best_after: 50
    save_frequency: 100
-    grad_norm: 1.0
+    grad_norm: 0.5
-    entropy_coef: 0.05
+    entropy_coef: 0.005
    truncate_grads: True
    bounds_loss_coef: 0.001
    e_clip: 0.2
-    horizon_length: 64
+    horizon_length: 32
    minibatch_size: 4096
    mini_epochs: 5
    critic_coef: 1
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -1,5 +1,4 @@
 import random
 import numpy
 import numpy as np
 import torch
@@ -343,7 +342,7 @@ class T1GetUpRewardCfg:
    # 7. 成功终极大奖
    is_success = RewTerm(
        func=lambda env, keys: env.termination_manager.get_term(keys),
-        weight=1000.0,
+        weight=300.0,
        params={"keys": "standing_success"}
    )
@@ -383,4 +382,4 @@ class T1EnvCfg(ManagerBasedRLEnvCfg):
    actions = T1ActionCfg()
    episode_length_s = 6.0
-    decimation = 4
+    decimation = 2