From 6d2ad9846a12cefbb29d0a06aa7d9adef933d84a Mon Sep 17 00:00:00 2001
From: ChenXi <chenxiliu2405@gmail.com>
Date: Fri, 20 Mar 2026 10:51:07 -0400
Subject: [PATCH] change parameter

---
 rl_game/get_up/config/ppo_cfg.yaml  | 8 ++++----
 rl_game/get_up/config/t1_env_cfg.py | 5 ++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml
index 65826ad..d896088 100644
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -41,19 +41,19 @@ params:
     normalize_advantage: True
     gamma: 0.99
     tau: 0.95
-    learning_rate: 3e-4
+    learning_rate: 2e-4
     lr_schedule: adaptive
     kl_threshold: 0.008
     score_to_win: 20000
     max_epochs: 500000
     save_best_after: 50
     save_frequency: 100
-    grad_norm: 1.0
-    entropy_coef: 0.05
+    grad_norm: 0.5
+    entropy_coef: 0.005
     truncate_grads: True
     bounds_loss_coef: 0.001
     e_clip: 0.2
-    horizon_length: 64
+    horizon_length: 32
     minibatch_size: 4096
     mini_epochs: 5
     critic_coef: 1
diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py
index d821458..2998b30 100644
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -1,5 +1,4 @@
 import random
-
 import numpy
 import numpy as np
 import torch
@@ -343,7 +342,7 @@ class T1GetUpRewardCfg:
     # 7. 成功终极大奖
     is_success = RewTerm(
         func=lambda env, keys: env.termination_manager.get_term(keys),
-        weight=1000.0,
+        weight=300.0,
         params={"keys": "standing_success"}
     )
 
@@ -383,4 +382,4 @@ class T1EnvCfg(ManagerBasedRLEnvCfg):
     actions = T1ActionCfg()
 
     episode_length_s = 6.0
-    decimation = 4
\ No newline at end of file
+    decimation = 2
\ No newline at end of file