change parameter

This commit is contained in:
2026-03-21 10:16:01 -04:00
parent fd8238dc41
commit 4833ba33c8
2 changed files with 17 additions and 11 deletions

View File

@@ -17,7 +17,7 @@ params:
name: default name: default
sigma_init: sigma_init:
name: const_initializer name: const_initializer
val: 0.7 val: 1.2
fixed_sigma: False fixed_sigma: False
mlp: mlp:
units: [512, 256, 128] units: [512, 256, 128]
@@ -39,9 +39,9 @@ params:
reward_shaper: reward_shaper:
scale_value: 1.0 scale_value: 1.0
normalize_advantage: True normalize_advantage: True
gamma: 0.99 gamma: 0.96
tau: 0.95 tau: 0.95
learning_rate: 2e-4 learning_rate: 5e-4
lr_schedule: adaptive lr_schedule: adaptive
kl_threshold: 0.008 kl_threshold: 0.008
score_to_win: 20000 score_to_win: 20000
@@ -49,12 +49,12 @@ params:
save_best_after: 50 save_best_after: 50
save_frequency: 100 save_frequency: 100
grad_norm: 0.5 grad_norm: 0.5
entropy_coef: 0.005 entropy_coef: 0.008
truncate_grads: True truncate_grads: True
bounds_loss_coef: 0.001 bounds_loss_coef: 0.001
e_clip: 0.2 e_clip: 0.2
horizon_length: 32 horizon_length: 128
minibatch_size: 4096 minibatch_size: 8192
mini_epochs: 5 mini_epochs: 4
critic_coef: 1 critic_coef: 1
clip_value: True clip_value: True

View File

@@ -184,12 +184,12 @@ class T1ActionCfg:
@configclass @configclass
class T1GetUpRewardCfg: class T1GetUpRewardCfg:
# 1. 姿态基础奖 (引导身体变正) # 1. 姿态基础奖 (引导身体变正)
upright = RewTerm(func=mdp.flat_orientation_l2, weight=30.0) upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0)
# 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地 # 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地
height_with_feet = RewTerm( height_with_feet = RewTerm(
func=standing_with_feet_reward, func=standing_with_feet_reward,
weight=25.0, # 作为核心引导,增加权重 weight=20.0, # 作为核心引导,增加权重
params={ params={
"min_head_height": 1.10, "min_head_height": 1.10,
"min_pelvis_height": 0.7, "min_pelvis_height": 0.7,
@@ -224,10 +224,16 @@ class T1GetUpRewardCfg:
params={"asset_cfg": SceneEntityCfg("robot")} params={"asset_cfg": SceneEntityCfg("robot")}
) )
# 5. 成功终极大奖 # 5. 时间惩罚 (强制效率)
time_penalty = RewTerm(
func=mdp.is_alive,
weight=-1.2
)
# 6. 成功终极大奖
is_success = RewTerm( is_success = RewTerm(
func=lambda env, keys: env.termination_manager.get_term(keys).float(), func=lambda env, keys: env.termination_manager.get_term(keys).float(),
weight=300.0, weight=500.0,
params={"keys": "standing_success"} params={"keys": "standing_success"}
) )