change parameter

This commit is contained in:
2026-03-20 09:53:34 -04:00
parent 49da77db51
commit 1fbc9dccac
2 changed files with 29 additions and 10 deletions

View File

@@ -17,8 +17,8 @@ params:
name: default
sigma_init:
name: const_initializer
val: 0.8
fixed_sigma: True
val: 0.7
fixed_sigma: False
mlp:
units: [512, 256, 128]
activation: relu
@@ -41,9 +41,9 @@ params:
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 5e-4
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.013
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 500000
save_best_after: 50
@@ -54,7 +54,7 @@ params:
bounds_loss_coef: 0.001
e_clip: 0.2
horizon_length: 64
minibatch_size: 8192
mini_epochs: 4
critic_coef: 2
minibatch_size: 4096
mini_epochs: 5
critic_coef: 1
clip_value: True

View File

@@ -106,6 +106,18 @@ def arm_push_up_reward(
pushing_up_bonus,
torch.zeros_like(pushing_up_bonus))
def linear_head_height_reward(env: ManagerBasedRLEnv, target_height: float, base_height: float = 0.15) -> torch.Tensor:
"""
计算头部从地面到目标高度的线性增量奖励
"""
head_idx, _ = env.scene["robot"].find_bodies("H2")
current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
# 计算相对于地面的提升量,并归一化到 0-1
reward = (current_head_h - base_height) / (target_height - base_height)
return torch.clamp(reward, min=0.0, max=1.0)
def is_standing_still(
env: ManagerBasedRLEnv,
min_head_height: float,
@@ -272,7 +284,14 @@ class T1GetUpRewardCfg:
}
)
# 4. 惩罚项
# 4. 引导机器人“向上看”和“抬起头”
head_lift = RewTerm(
func=linear_head_height_reward,
weight=15.0,
params={"target_height": 1.1, "base_height": 0.15}
)
# 5. 惩罚项
undesired_contacts = RewTerm(
func=mdp.undesired_contacts,
weight=-2.0,
@@ -283,14 +302,14 @@ class T1GetUpRewardCfg:
}
)
# 5. 抑制跳跃:严厉惩罚向上窜的速度
# 6. 抑制跳跃:严厉惩罚向上窜的速度
root_vel_z_penalty = RewTerm(
func=root_vel_z_l2_local,
weight=-1.0, # 增大负权重
params={"asset_cfg": SceneEntityCfg("robot")}
)
# 6. 抑制滞空 (Airtime Penalty)
# 7. 抑制滞空 (Airtime Penalty)
feet_airtime = RewTerm(
func=strict_feet_contact_reward,
weight=-5.0, # 加大权重,跳一下扣的分比站起来得的分还多