From 6d2ad9846a12cefbb29d0a06aa7d9adef933d84a Mon Sep 17 00:00:00 2001 From: ChenXi Date: Fri, 20 Mar 2026 10:51:07 -0400 Subject: [PATCH] change parameter --- rl_game/get_up/config/ppo_cfg.yaml | 8 ++++---- rl_game/get_up/config/t1_env_cfg.py | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index 65826ad..d896088 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -41,19 +41,19 @@ params: normalize_advantage: True gamma: 0.99 tau: 0.95 - learning_rate: 3e-4 + learning_rate: 2e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 20000 max_epochs: 500000 save_best_after: 50 save_frequency: 100 - grad_norm: 1.0 - entropy_coef: 0.05 + grad_norm: 0.5 + entropy_coef: 0.005 truncate_grads: True bounds_loss_coef: 0.001 e_clip: 0.2 - horizon_length: 64 + horizon_length: 32 minibatch_size: 4096 mini_epochs: 5 critic_coef: 1 diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index d821458..2998b30 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -1,5 +1,4 @@ import random - import numpy import numpy as np import torch @@ -343,7 +342,7 @@ class T1GetUpRewardCfg: # 7. 成功终极大奖 is_success = RewTerm( func=lambda env, keys: env.termination_manager.get_term(keys), - weight=1000.0, + weight=300.0, params={"keys": "standing_success"} ) @@ -383,4 +382,4 @@ class T1EnvCfg(ManagerBasedRLEnvCfg): actions = T1ActionCfg() episode_length_s = 6.0 - decimation = 4 \ No newline at end of file + decimation = 2 \ No newline at end of file