change parameter
This commit is contained in:
@@ -17,8 +17,8 @@ params:
|
|||||||
name: default
|
name: default
|
||||||
sigma_init:
|
sigma_init:
|
||||||
name: const_initializer
|
name: const_initializer
|
||||||
val: 0.8
|
val: 0.7
|
||||||
fixed_sigma: True
|
fixed_sigma: False
|
||||||
mlp:
|
mlp:
|
||||||
units: [512, 256, 128]
|
units: [512, 256, 128]
|
||||||
activation: relu
|
activation: relu
|
||||||
@@ -41,9 +41,9 @@ params:
|
|||||||
normalize_advantage: True
|
normalize_advantage: True
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
tau: 0.95
|
tau: 0.95
|
||||||
learning_rate: 5e-4
|
learning_rate: 3e-4
|
||||||
lr_schedule: adaptive
|
lr_schedule: adaptive
|
||||||
kl_threshold: 0.013
|
kl_threshold: 0.008
|
||||||
score_to_win: 20000
|
score_to_win: 20000
|
||||||
max_epochs: 500000
|
max_epochs: 500000
|
||||||
save_best_after: 50
|
save_best_after: 50
|
||||||
@@ -54,7 +54,7 @@ params:
|
|||||||
bounds_loss_coef: 0.001
|
bounds_loss_coef: 0.001
|
||||||
e_clip: 0.2
|
e_clip: 0.2
|
||||||
horizon_length: 64
|
horizon_length: 64
|
||||||
minibatch_size: 8192
|
minibatch_size: 4096
|
||||||
mini_epochs: 4
|
mini_epochs: 5
|
||||||
critic_coef: 2
|
critic_coef: 1
|
||||||
clip_value: True
|
clip_value: True
|
||||||
@@ -106,6 +106,18 @@ def arm_push_up_reward(
|
|||||||
pushing_up_bonus,
|
pushing_up_bonus,
|
||||||
torch.zeros_like(pushing_up_bonus))
|
torch.zeros_like(pushing_up_bonus))
|
||||||
|
|
||||||
|
|
||||||
|
def linear_head_height_reward(env: ManagerBasedRLEnv, target_height: float, base_height: float = 0.15) -> torch.Tensor:
|
||||||
|
"""
|
||||||
|
计算头部从地面到目标高度的线性增量奖励
|
||||||
|
"""
|
||||||
|
head_idx, _ = env.scene["robot"].find_bodies("H2")
|
||||||
|
current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
|
||||||
|
|
||||||
|
# 计算相对于地面的提升量,并归一化到 0-1
|
||||||
|
reward = (current_head_h - base_height) / (target_height - base_height)
|
||||||
|
return torch.clamp(reward, min=0.0, max=1.0)
|
||||||
|
|
||||||
def is_standing_still(
|
def is_standing_still(
|
||||||
env: ManagerBasedRLEnv,
|
env: ManagerBasedRLEnv,
|
||||||
min_head_height: float,
|
min_head_height: float,
|
||||||
@@ -272,7 +284,14 @@ class T1GetUpRewardCfg:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. 惩罚项
|
# 4. 引导机器人“向上看”和“抬起头”
|
||||||
|
head_lift = RewTerm(
|
||||||
|
func=linear_head_height_reward,
|
||||||
|
weight=15.0,
|
||||||
|
params={"target_height": 1.1, "base_height": 0.15}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5. 惩罚项
|
||||||
undesired_contacts = RewTerm(
|
undesired_contacts = RewTerm(
|
||||||
func=mdp.undesired_contacts,
|
func=mdp.undesired_contacts,
|
||||||
weight=-2.0,
|
weight=-2.0,
|
||||||
@@ -283,14 +302,14 @@ class T1GetUpRewardCfg:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 5. 抑制跳跃:严厉惩罚向上窜的速度
|
# 6. 抑制跳跃:严厉惩罚向上窜的速度
|
||||||
root_vel_z_penalty = RewTerm(
|
root_vel_z_penalty = RewTerm(
|
||||||
func=root_vel_z_l2_local,
|
func=root_vel_z_l2_local,
|
||||||
weight=-1.0, # 增大负权重
|
weight=-1.0, # 增大负权重
|
||||||
params={"asset_cfg": SceneEntityCfg("robot")}
|
params={"asset_cfg": SceneEntityCfg("robot")}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 6. 抑制滞空 (Airtime Penalty)
|
# 7. 抑制滞空 (Airtime Penalty)
|
||||||
feet_airtime = RewTerm(
|
feet_airtime = RewTerm(
|
||||||
func=strict_feet_contact_reward,
|
func=strict_feet_contact_reward,
|
||||||
weight=-5.0, # 加大权重,跳一下扣的分比站起来得的分还多
|
weight=-5.0, # 加大权重,跳一下扣的分比站起来得的分还多
|
||||||
|
|||||||
Reference in New Issue
Block a user