change the reward remove arm disturbance

This commit is contained in:
2026-03-22 02:26:16 -04:00
parent f7c8e6e325
commit 2e2d68a933
2 changed files with 7 additions and 21 deletions

View File

@@ -39,13 +39,13 @@ params:
reward_shaper: reward_shaper:
scale_value: 1.0 scale_value: 1.0
normalize_advantage: True normalize_advantage: True
gamma: 0.96 gamma: 0.98
tau: 0.95 tau: 0.95
learning_rate: 3e-4 learning_rate: 5e-4
lr_schedule: adaptive lr_schedule: adaptive
kl_threshold: 0.008 kl_threshold: 0.008
score_to_win: 20000 score_to_win: 20000
max_epochs: 200 max_epochs: 500
save_best_after: 50 save_best_after: 50
save_frequency: 100 save_frequency: 100
grad_norm: 0.5 grad_norm: 0.5

View File

@@ -53,7 +53,7 @@ def universal_arm_support_reward(
env: ManagerBasedRLEnv, env: ManagerBasedRLEnv,
sensor_cfg: SceneEntityCfg, sensor_cfg: SceneEntityCfg,
height_threshold: float = 0.60, height_threshold: float = 0.60,
min_force: float = 2.0 min_force: float = 15.0
) -> torch.Tensor: ) -> torch.Tensor:
""" """
通用手臂支撑奖励:同时支持仰卧起坐支撑和俯卧撑起。 通用手臂支撑奖励:同时支持仰卧起坐支撑和俯卧撑起。
@@ -227,11 +227,11 @@ class T1GetUpRewardCfg:
# 3. 手臂撑地奖:辅助脱离地面阶段 # 3. 手臂撑地奖:辅助脱离地面阶段
arm_push_support = RewTerm( arm_push_support = RewTerm(
func=universal_arm_support_reward, func=universal_arm_support_reward,
weight=20.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键 weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键
params={ params={
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]), "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
"height_threshold": 0.65, # 躯干升到 0.6m 前都鼓励手臂用力 "height_threshold": 0.65, # 躯干升到 0.6m 前都鼓励手臂用力
"min_force": 3.0 # 只要有 15N 的力就触发 "min_force": 8.0 # 只要有 15N 的力就触发
} }
) )
@@ -251,24 +251,10 @@ class T1GetUpRewardCfg:
# 6. 成功终极大奖 # 6. 成功终极大奖
is_success = RewTerm( is_success = RewTerm(
func=lambda env, keys: env.termination_manager.get_term(keys).float(), func=lambda env, keys: env.termination_manager.get_term(keys).float(),
weight=500.0, weight=1000.0,
params={"keys": "standing_success"} params={"keys": "standing_success"}
) )
# 7. 手臂关节活跃度奖 (诱导摆动)
arm_movement_exploration = RewTerm(
func=mdp.joint_vel_l2,
weight=2, # 权重不要太高,防止变成“风扇”
params={"asset_cfg": SceneEntityCfg("robot",joint_names=["Left_Shoulder.*", "Left_Elbow.*", "Right_Shoulder.*", "Right_Elbow.*"])}
)
# 8. 手臂位置多样性奖 (离开默认折叠姿态)
arm_deviation_bonus = RewTerm(
func=joint_deviation_l2,
weight=1,
params={"asset_cfg": SceneEntityCfg("robot",joint_names=["Left_Shoulder.*", "Left_Elbow.*", "Right_Shoulder.*", "Right_Elbow.*"])}
)
@configclass @configclass
class T1GetUpTerminationsCfg: class T1GetUpTerminationsCfg:
time_out = DoneTerm(func=mdp.time_out) time_out = DoneTerm(func=mdp.time_out)