From 72a22bd78a63a7911458de07bf7c80be36a3ebf7 Mon Sep 17 00:00:00 2001 From: ChenXi Date: Sat, 21 Mar 2026 08:38:17 -0400 Subject: [PATCH] change arm to push the ground reward function --- rl_game/get_up/config/ppo_cfg.yaml | 2 +- rl_game/get_up/config/t1_env_cfg.py | 61 +++++++++++++++++------------ 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index d896088..a440bf1 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -45,7 +45,7 @@ params: lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 20000 - max_epochs: 500000 + max_epochs: 1000000 save_best_after: 50 save_frequency: 100 grad_norm: 0.5 diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 5f13245..ab00a58 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -48,47 +48,54 @@ def standing_with_feet_reward( return combined_reward + def arm_push_up_reward( env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, - height_threshold: float = 0.5, - min_force: float = 20.0 + height_threshold: float = 0.6, + min_force: float = 2.0 # 大幅降低门槛:只要有 2N 的力就说明碰到了 ) -> torch.Tensor: - """ - 强化版手臂撑地奖励: - 1. 鼓励手臂产生超过阈值的垂直反作用力。 - 2. 当手臂用力且躯干有向上速度时,给予额外加成。 - """ - # 获取手臂传感器数据 contact_sensor = env.scene.sensors.get(sensor_cfg.name) if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device) - # 1. 获取手臂 Z 轴受力 (取所有手臂 Body 的合力或最大力) + # 1. 获取手臂受力 + # 使用 net_forces_w 的范数或 Z 分量 arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2] max_arm_force = torch.max(arm_forces_z, dim=-1)[0] - # 归一化受力奖励:在 20N 到 100N 之间线性增长 - force_reward = torch.clamp((max_arm_force - min_force) / 80.0, min=0.0, max=1.0) + # 2. 核心修改:将奖励分为“接触奖”和“撑地奖” + # 接触奖:只要碰到了就给 0.2 的基础分 + is_contact = (max_arm_force > 0.1).float() - # 2. 获取躯干高度和垂直速度 + # 撑地奖:力在 2N 到 50N 之间线性增长 (50N 对于支撑足够了) + force_reward = torch.clamp((max_arm_force - min_force) / 48.0, min=0.0, max=1.0) + + # 组合:有接触就有基础分,力越大加分越多 + total_force_score = is_contact * 0.2 + force_reward * 0.8 + + # 3. 协同奖励:躯干高度和速度 pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2] root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2] - # 3. 协同奖励:当手臂在用力推,且躯干正在上升时,给高分 - # 只有在高度低于阈值(还在撑起阶段)时生效 + # 只要在撑,且躯干在往上走,就大幅加成 pushing_up_bonus = torch.where( - (max_arm_force > min_force) & (root_vel_z > 0.05), - force_reward * (1.0 + root_vel_z * 2.0), # 速度越快奖励越高 - force_reward + (max_arm_force > min_force) & (root_vel_z > 0.02), + total_force_score * (1.0 + torch.clamp(root_vel_z * 5.0, max=2.0)), + total_force_score ) - # 只有在躯干较低时才发放此奖励 + # 4. 这里的门槛要严格:一旦站得比较高了(比如 0.6m),就停止对手臂的奖励, + # 强迫它把重心转移到腿部 return torch.where(current_height < height_threshold, pushing_up_bonus, torch.zeros_like(pushing_up_bonus)) +def torso_pitch_reward(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: + proj_gravity = env.scene[asset_cfg.name].data.projected_gravity_b + return torch.square(proj_gravity[:, 0]) + def is_standing_still( env: ManagerBasedRLEnv, min_head_height: float, @@ -185,7 +192,7 @@ class T1ActionCfg: @configclass class T1GetUpRewardCfg: # 1. 姿态基础奖 (引导身体变正) - upright = RewTerm(func=mdp.flat_orientation_l2, weight=10.0) + upright = RewTerm(func=mdp.flat_orientation_l2, weight=30.0) # 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地 height_with_feet = RewTerm( @@ -203,7 +210,7 @@ class T1GetUpRewardCfg: # 3. 手臂撑地奖:辅助脱离地面阶段 arm_push_support = RewTerm( func=arm_push_up_reward, - weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键 + weight=45.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键 params={ "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]), "height_threshold": 0.6, # 躯干升到 0.6m 前都鼓励手臂用力 @@ -211,7 +218,6 @@ class T1GetUpRewardCfg: } ) - # 4. 关节限位惩罚 (新增:防止关节撞死导致数值问题) joint_limits = RewTerm( func=mdp.joint_pos_limits, @@ -219,6 +225,13 @@ class T1GetUpRewardCfg: params={"asset_cfg": SceneEntityCfg("robot")} ) + # 4. 新增:躯干仰角奖 (核心诱导) + torso_pitch = RewTerm( + func=torso_pitch_reward, # 或者使用 orientation 相关的项 + weight=15.0, + params={"asset_cfg": SceneEntityCfg("robot")} + ) + # 5. 成功终极大奖 is_success = RewTerm( func=lambda env, keys: env.termination_manager.get_term(keys).float(), @@ -240,9 +253,9 @@ class T1GetUpTerminationsCfg: params={ "min_head_height": 1.05, "min_pelvis_height": 0.75, - "max_angle_error": 0.15, - "standing_time": 0.8, - "velocity_threshold": 0.15 + "max_angle_error": 0.3, + "standing_time": 0.2, + "velocity_threshold": 0.5 } )