change parameter
This commit is contained in:
@@ -17,7 +17,7 @@ params:
|
|||||||
name: default
|
name: default
|
||||||
sigma_init:
|
sigma_init:
|
||||||
name: const_initializer
|
name: const_initializer
|
||||||
val: 0.7
|
val: 1.2
|
||||||
fixed_sigma: False
|
fixed_sigma: False
|
||||||
mlp:
|
mlp:
|
||||||
units: [512, 256, 128]
|
units: [512, 256, 128]
|
||||||
@@ -39,9 +39,9 @@ params:
|
|||||||
reward_shaper:
|
reward_shaper:
|
||||||
scale_value: 1.0
|
scale_value: 1.0
|
||||||
normalize_advantage: True
|
normalize_advantage: True
|
||||||
gamma: 0.99
|
gamma: 0.96
|
||||||
tau: 0.95
|
tau: 0.95
|
||||||
learning_rate: 2e-4
|
learning_rate: 5e-4
|
||||||
lr_schedule: adaptive
|
lr_schedule: adaptive
|
||||||
kl_threshold: 0.008
|
kl_threshold: 0.008
|
||||||
score_to_win: 20000
|
score_to_win: 20000
|
||||||
@@ -49,12 +49,12 @@ params:
|
|||||||
save_best_after: 50
|
save_best_after: 50
|
||||||
save_frequency: 100
|
save_frequency: 100
|
||||||
grad_norm: 0.5
|
grad_norm: 0.5
|
||||||
entropy_coef: 0.005
|
entropy_coef: 0.008
|
||||||
truncate_grads: True
|
truncate_grads: True
|
||||||
bounds_loss_coef: 0.001
|
bounds_loss_coef: 0.001
|
||||||
e_clip: 0.2
|
e_clip: 0.2
|
||||||
horizon_length: 32
|
horizon_length: 128
|
||||||
minibatch_size: 4096
|
minibatch_size: 8192
|
||||||
mini_epochs: 5
|
mini_epochs: 4
|
||||||
critic_coef: 1
|
critic_coef: 1
|
||||||
clip_value: True
|
clip_value: True
|
||||||
@@ -184,12 +184,12 @@ class T1ActionCfg:
|
|||||||
@configclass
|
@configclass
|
||||||
class T1GetUpRewardCfg:
|
class T1GetUpRewardCfg:
|
||||||
# 1. 姿态基础奖 (引导身体变正)
|
# 1. 姿态基础奖 (引导身体变正)
|
||||||
upright = RewTerm(func=mdp.flat_orientation_l2, weight=30.0)
|
upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0)
|
||||||
|
|
||||||
# 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地
|
# 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地
|
||||||
height_with_feet = RewTerm(
|
height_with_feet = RewTerm(
|
||||||
func=standing_with_feet_reward,
|
func=standing_with_feet_reward,
|
||||||
weight=25.0, # 作为核心引导,增加权重
|
weight=20.0, # 作为核心引导,增加权重
|
||||||
params={
|
params={
|
||||||
"min_head_height": 1.10,
|
"min_head_height": 1.10,
|
||||||
"min_pelvis_height": 0.7,
|
"min_pelvis_height": 0.7,
|
||||||
@@ -224,10 +224,16 @@ class T1GetUpRewardCfg:
|
|||||||
params={"asset_cfg": SceneEntityCfg("robot")}
|
params={"asset_cfg": SceneEntityCfg("robot")}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 5. 成功终极大奖
|
# 5. 时间惩罚 (强制效率)
|
||||||
|
time_penalty = RewTerm(
|
||||||
|
func=mdp.is_alive,
|
||||||
|
weight=-1.2
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. 成功终极大奖
|
||||||
is_success = RewTerm(
|
is_success = RewTerm(
|
||||||
func=lambda env, keys: env.termination_manager.get_term(keys).float(),
|
func=lambda env, keys: env.termination_manager.get_term(keys).float(),
|
||||||
weight=300.0,
|
weight=500.0,
|
||||||
params={"keys": "standing_success"}
|
params={"keys": "standing_success"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user