change parameter
This commit is contained in:
@@ -17,8 +17,8 @@ params:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 0.8
|
||||
fixed_sigma: True
|
||||
val: 0.7
|
||||
fixed_sigma: False
|
||||
mlp:
|
||||
units: [512, 256, 128]
|
||||
activation: relu
|
||||
@@ -41,9 +41,9 @@ params:
|
||||
normalize_advantage: True
|
||||
gamma: 0.99
|
||||
tau: 0.95
|
||||
learning_rate: 5e-4
|
||||
learning_rate: 3e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.013
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 500000
|
||||
save_best_after: 50
|
||||
@@ -54,7 +54,7 @@ params:
|
||||
bounds_loss_coef: 0.001
|
||||
e_clip: 0.2
|
||||
horizon_length: 64
|
||||
minibatch_size: 8192
|
||||
mini_epochs: 4
|
||||
critic_coef: 2
|
||||
minibatch_size: 4096
|
||||
mini_epochs: 5
|
||||
critic_coef: 1
|
||||
clip_value: True
|
||||
@@ -106,6 +106,18 @@ def arm_push_up_reward(
|
||||
pushing_up_bonus,
|
||||
torch.zeros_like(pushing_up_bonus))
|
||||
|
||||
|
||||
def linear_head_height_reward(env: ManagerBasedRLEnv, target_height: float, base_height: float = 0.15) -> torch.Tensor:
|
||||
"""
|
||||
计算头部从地面到目标高度的线性增量奖励
|
||||
"""
|
||||
head_idx, _ = env.scene["robot"].find_bodies("H2")
|
||||
current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
|
||||
|
||||
# 计算相对于地面的提升量,并归一化到 0-1
|
||||
reward = (current_head_h - base_height) / (target_height - base_height)
|
||||
return torch.clamp(reward, min=0.0, max=1.0)
|
||||
|
||||
def is_standing_still(
|
||||
env: ManagerBasedRLEnv,
|
||||
min_head_height: float,
|
||||
@@ -272,7 +284,14 @@ class T1GetUpRewardCfg:
|
||||
}
|
||||
)
|
||||
|
||||
# 4. 惩罚项
|
||||
# 4. 引导机器人“向上看”和“抬起头”
|
||||
head_lift = RewTerm(
|
||||
func=linear_head_height_reward,
|
||||
weight=15.0,
|
||||
params={"target_height": 1.1, "base_height": 0.15}
|
||||
)
|
||||
|
||||
# 5. 惩罚项
|
||||
undesired_contacts = RewTerm(
|
||||
func=mdp.undesired_contacts,
|
||||
weight=-2.0,
|
||||
@@ -283,14 +302,14 @@ class T1GetUpRewardCfg:
|
||||
}
|
||||
)
|
||||
|
||||
# 5. 抑制跳跃:严厉惩罚向上窜的速度
|
||||
# 6. 抑制跳跃:严厉惩罚向上窜的速度
|
||||
root_vel_z_penalty = RewTerm(
|
||||
func=root_vel_z_l2_local,
|
||||
weight=-1.0, # 增大负权重
|
||||
params={"asset_cfg": SceneEntityCfg("robot")}
|
||||
)
|
||||
|
||||
# 6. 抑制滞空 (Airtime Penalty)
|
||||
# 7. 抑制滞空 (Airtime Penalty)
|
||||
feet_airtime = RewTerm(
|
||||
func=strict_feet_contact_reward,
|
||||
weight=-5.0, # 加大权重,跳一下扣的分比站起来得的分还多
|
||||
|
||||
Reference in New Issue
Block a user