From 4833ba33c8e08ad2ffc8fc8e3ba972f954f79323 Mon Sep 17 00:00:00 2001 From: ChenXi Date: Sat, 21 Mar 2026 10:16:01 -0400 Subject: [PATCH] change parameter --- rl_game/get_up/config/ppo_cfg.yaml | 14 +++++++------- rl_game/get_up/config/t1_env_cfg.py | 14 ++++++++++---- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index a440bf1..5c28ead 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -17,7 +17,7 @@ params: name: default sigma_init: name: const_initializer - val: 0.7 + val: 1.2 fixed_sigma: False mlp: units: [512, 256, 128] @@ -39,9 +39,9 @@ params: reward_shaper: scale_value: 1.0 normalize_advantage: True - gamma: 0.99 + gamma: 0.96 tau: 0.95 - learning_rate: 2e-4 + learning_rate: 5e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 20000 @@ -49,12 +49,12 @@ params: save_best_after: 50 save_frequency: 100 grad_norm: 0.5 - entropy_coef: 0.005 + entropy_coef: 0.008 truncate_grads: True bounds_loss_coef: 0.001 e_clip: 0.2 - horizon_length: 32 - minibatch_size: 4096 - mini_epochs: 5 + horizon_length: 128 + minibatch_size: 8192 + mini_epochs: 4 critic_coef: 1 clip_value: True \ No newline at end of file diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 870f3ab..097f8b5 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -184,12 +184,12 @@ class T1ActionCfg: @configclass class T1GetUpRewardCfg: # 1. 姿态基础奖 (引导身体变正) - upright = RewTerm(func=mdp.flat_orientation_l2, weight=30.0) + upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0) # 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地 height_with_feet = RewTerm( func=standing_with_feet_reward, - weight=25.0, # 作为核心引导,增加权重 + weight=20.0, # 作为核心引导,增加权重 params={ "min_head_height": 1.10, "min_pelvis_height": 0.7, @@ -224,10 +224,16 @@ class T1GetUpRewardCfg: params={"asset_cfg": SceneEntityCfg("robot")} ) - # 5. 成功终极大奖 + # 5. 时间惩罚 (强制效率) + time_penalty = RewTerm( + func=mdp.is_alive, + weight=-1.2 + ) + + # 6. 成功终极大奖 is_success = RewTerm( func=lambda env, keys: env.termination_manager.get_term(keys).float(), - weight=300.0, + weight=500.0, params={"keys": "standing_success"} )