Amend arm reward to get reward difficultly
This commit is contained in:
@@ -52,45 +52,37 @@ def standing_with_feet_reward(
|
|||||||
def arm_push_up_reward(
|
def arm_push_up_reward(
|
||||||
env: ManagerBasedRLEnv,
|
env: ManagerBasedRLEnv,
|
||||||
sensor_cfg: SceneEntityCfg,
|
sensor_cfg: SceneEntityCfg,
|
||||||
height_threshold: float = 0.6,
|
height_threshold: float = 0.55,
|
||||||
min_force: float = 2.0 # 大幅降低门槛:只要有 2N 的力就说明碰到了
|
min_force: float = 15.0
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
|
"""带几何限制的高标准手臂支撑奖励"""
|
||||||
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
|
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
|
||||||
if contact_sensor is None:
|
if contact_sensor is None:
|
||||||
return torch.zeros(env.num_envs, device=env.device)
|
return torch.zeros(env.num_envs, device=env.device)
|
||||||
|
|
||||||
# 1. 获取手臂受力
|
# 1. 获取受力数据
|
||||||
# 使用 net_forces_w 的范数或 Z 分量
|
|
||||||
arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
|
arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
|
||||||
max_arm_force = torch.max(arm_forces_z, dim=-1)[0]
|
avg_arm_force = torch.mean(arm_forces_z, dim=-1)
|
||||||
|
|
||||||
# 2. 核心修改:将奖励分为“接触奖”和“撑地奖”
|
# 2. 几何限制:手臂必须在躯干下方 (修复了之前的 AttributeError)
|
||||||
# 接触奖:只要碰到了就给 0.2 的基础分
|
arm_body_indices, _ = env.scene["robot"].find_bodies(sensor_cfg.body_names)
|
||||||
is_contact = (max_arm_force > 0.1).float()
|
|
||||||
|
|
||||||
# 撑地奖:力在 2N 到 50N 之间线性增长 (50N 对于支撑足够了)
|
|
||||||
force_reward = torch.clamp((max_arm_force - min_force) / 48.0, min=0.0, max=1.0)
|
|
||||||
|
|
||||||
# 组合:有接触就有基础分,力越大加分越多
|
|
||||||
total_force_score = is_contact * 0.2 + force_reward * 0.8
|
|
||||||
|
|
||||||
# 3. 协同奖励:躯干高度和速度
|
|
||||||
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
|
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
|
||||||
current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
|
pelvis_pos_z = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
|
||||||
|
arm_pos_z = env.scene["robot"].data.body_state_w[:, arm_body_indices, 2]
|
||||||
|
|
||||||
|
# 手臂是否全部低于盆骨
|
||||||
|
is_below_pelvis = torch.all(arm_pos_z < pelvis_pos_z.unsqueeze(1), dim=-1).float()
|
||||||
|
|
||||||
|
# 3. 计算奖励
|
||||||
|
force_reward = torch.clamp((avg_arm_force - min_force) / 45.0, min=0.0, max=1.0)
|
||||||
root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
|
root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
|
||||||
|
velocity_factor = torch.clamp(root_vel_z * 3.0, min=0.0, max=1.5)
|
||||||
|
|
||||||
# 只要在撑,且躯干在往上走,就大幅加成
|
total_reward = force_reward * is_below_pelvis * (1.0 + velocity_factor)
|
||||||
pushing_up_bonus = torch.where(
|
|
||||||
(max_arm_force > min_force) & (root_vel_z > 0.02),
|
|
||||||
total_force_score * (1.0 + torch.clamp(root_vel_z * 5.0, max=2.0)),
|
|
||||||
total_force_score
|
|
||||||
)
|
|
||||||
|
|
||||||
# 4. 这里的门槛要严格:一旦站得比较高了(比如 0.6m),就停止对手臂的奖励,
|
# 高度越高,手臂奖励越低 (强迫切换到腿)
|
||||||
# 强迫它把重心转移到腿部
|
height_fade = torch.clamp((height_threshold - pelvis_pos_z) / 0.1, min=0.0, max=1.0)
|
||||||
return torch.where(current_height < height_threshold,
|
return total_reward * height_fade
|
||||||
pushing_up_bonus,
|
|
||||||
torch.zeros_like(pushing_up_bonus))
|
|
||||||
|
|
||||||
def torso_pitch_reward(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
def torso_pitch_reward(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||||
proj_gravity = env.scene[asset_cfg.name].data.projected_gravity_b
|
proj_gravity = env.scene[asset_cfg.name].data.projected_gravity_b
|
||||||
@@ -210,7 +202,7 @@ class T1GetUpRewardCfg:
|
|||||||
# 3. 手臂撑地奖:辅助脱离地面阶段
|
# 3. 手臂撑地奖:辅助脱离地面阶段
|
||||||
arm_push_support = RewTerm(
|
arm_push_support = RewTerm(
|
||||||
func=arm_push_up_reward,
|
func=arm_push_up_reward,
|
||||||
weight=45.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键
|
weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键
|
||||||
params={
|
params={
|
||||||
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
|
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
|
||||||
"height_threshold": 0.6, # 躯干升到 0.6m 前都鼓励手臂用力
|
"height_threshold": 0.6, # 躯干升到 0.6m 前都鼓励手臂用力
|
||||||
@@ -235,7 +227,7 @@ class T1GetUpRewardCfg:
|
|||||||
# 5. 成功终极大奖
|
# 5. 成功终极大奖
|
||||||
is_success = RewTerm(
|
is_success = RewTerm(
|
||||||
func=lambda env, keys: env.termination_manager.get_term(keys).float(),
|
func=lambda env, keys: env.termination_manager.get_term(keys).float(),
|
||||||
weight=50.0,
|
weight=300.0,
|
||||||
params={"keys": "standing_success"}
|
params={"keys": "standing_success"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user