change the reward remove arm disturbance
This commit is contained in:
@@ -39,13 +39,13 @@ params:
|
|||||||
reward_shaper:
|
reward_shaper:
|
||||||
scale_value: 1.0
|
scale_value: 1.0
|
||||||
normalize_advantage: True
|
normalize_advantage: True
|
||||||
gamma: 0.96
|
gamma: 0.98
|
||||||
tau: 0.95
|
tau: 0.95
|
||||||
learning_rate: 3e-4
|
learning_rate: 5e-4
|
||||||
lr_schedule: adaptive
|
lr_schedule: adaptive
|
||||||
kl_threshold: 0.008
|
kl_threshold: 0.008
|
||||||
score_to_win: 20000
|
score_to_win: 20000
|
||||||
max_epochs: 200
|
max_epochs: 500
|
||||||
save_best_after: 50
|
save_best_after: 50
|
||||||
save_frequency: 100
|
save_frequency: 100
|
||||||
grad_norm: 0.5
|
grad_norm: 0.5
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ def universal_arm_support_reward(
|
|||||||
env: ManagerBasedRLEnv,
|
env: ManagerBasedRLEnv,
|
||||||
sensor_cfg: SceneEntityCfg,
|
sensor_cfg: SceneEntityCfg,
|
||||||
height_threshold: float = 0.60,
|
height_threshold: float = 0.60,
|
||||||
min_force: float = 2.0
|
min_force: float = 15.0
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
"""
|
"""
|
||||||
通用手臂支撑奖励:同时支持仰卧起坐支撑和俯卧撑起。
|
通用手臂支撑奖励:同时支持仰卧起坐支撑和俯卧撑起。
|
||||||
@@ -227,11 +227,11 @@ class T1GetUpRewardCfg:
|
|||||||
# 3. 手臂撑地奖:辅助脱离地面阶段
|
# 3. 手臂撑地奖:辅助脱离地面阶段
|
||||||
arm_push_support = RewTerm(
|
arm_push_support = RewTerm(
|
||||||
func=universal_arm_support_reward,
|
func=universal_arm_support_reward,
|
||||||
weight=20.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键
|
weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键
|
||||||
params={
|
params={
|
||||||
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
|
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
|
||||||
"height_threshold": 0.65, # 躯干升到 0.6m 前都鼓励手臂用力
|
"height_threshold": 0.65, # 躯干升到 0.6m 前都鼓励手臂用力
|
||||||
"min_force": 3.0 # 只要有 15N 的力就触发
|
"min_force": 8.0 # 只要有 15N 的力就触发
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -251,24 +251,10 @@ class T1GetUpRewardCfg:
|
|||||||
# 6. 成功终极大奖
|
# 6. 成功终极大奖
|
||||||
is_success = RewTerm(
|
is_success = RewTerm(
|
||||||
func=lambda env, keys: env.termination_manager.get_term(keys).float(),
|
func=lambda env, keys: env.termination_manager.get_term(keys).float(),
|
||||||
weight=500.0,
|
weight=1000.0,
|
||||||
params={"keys": "standing_success"}
|
params={"keys": "standing_success"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 7. 手臂关节活跃度奖 (诱导摆动)
|
|
||||||
arm_movement_exploration = RewTerm(
|
|
||||||
func=mdp.joint_vel_l2,
|
|
||||||
weight=2, # 权重不要太高,防止变成“风扇”
|
|
||||||
params={"asset_cfg": SceneEntityCfg("robot",joint_names=["Left_Shoulder.*", "Left_Elbow.*", "Right_Shoulder.*", "Right_Elbow.*"])}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 8. 手臂位置多样性奖 (离开默认折叠姿态)
|
|
||||||
arm_deviation_bonus = RewTerm(
|
|
||||||
func=joint_deviation_l2,
|
|
||||||
weight=1,
|
|
||||||
params={"asset_cfg": SceneEntityCfg("robot",joint_names=["Left_Shoulder.*", "Left_Elbow.*", "Right_Shoulder.*", "Right_Elbow.*"])}
|
|
||||||
)
|
|
||||||
|
|
||||||
@configclass
|
@configclass
|
||||||
class T1GetUpTerminationsCfg:
|
class T1GetUpTerminationsCfg:
|
||||||
time_out = DoneTerm(func=mdp.time_out)
|
time_out = DoneTerm(func=mdp.time_out)
|
||||||
|
|||||||
Reference in New Issue
Block a user