From 2ae7210062b14658c6f3ce29ed7e499a4cb2678c Mon Sep 17 00:00:00 2001 From: ChenXi Date: Fri, 20 Mar 2026 03:37:56 -0400 Subject: [PATCH] Amend for standing --- rl_game/get_up/config/ppo_cfg.yaml | 10 +-- rl_game/get_up/config/t1_env_cfg.py | 127 ++++++++++++++++++++-------- rl_game/get_up/env/t1_env.py | 7 -- 3 files changed, 97 insertions(+), 47 deletions(-) diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index e40d15e..a8c9243 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -17,7 +17,7 @@ params: name: default sigma_init: name: const_initializer - val: 0 + val: 0.8 fixed_sigma: True mlp: units: [512, 256, 128] @@ -39,7 +39,7 @@ params: reward_shaper: scale_value: 1.0 normalize_advantage: True - gamma: 0.96 + gamma: 0.99 tau: 0.95 learning_rate: 5e-4 lr_schedule: adaptive @@ -49,12 +49,12 @@ params: save_best_after: 50 save_frequency: 100 grad_norm: 1.0 - entropy_coef: 0.02 + entropy_coef: 0.05 truncate_grads: True bounds_loss_coef: 0.001 e_clip: 0.2 - horizon_length: 32 - minibatch_size: 16384 + horizon_length: 64 + minibatch_size: 8192 mini_epochs: 4 critic_coef: 2 clip_value: True \ No newline at end of file diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index c71d7c5..6f7916f 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -21,53 +21,76 @@ def standing_with_feet_reward( min_head_height: float, min_pelvis_height: float, sensor_cfg: SceneEntityCfg, - force_threshold: float = 30.0 + force_threshold: float = 30.0, + max_v_z: float = 0.25 ) -> torch.Tensor: """ - 【双高度条件奖励】:只有脚踩地,且头和躯干同时达到高度,才给予高度奖励。 + 平滑切换的高度奖励: + 低高度 -> 纯高度引导 + 高高度 -> 高度 + 足底力 + 速度约束 """ - # 1. 获取脚部触地力判定 - contact_sensor = env.scene.sensors.get(sensor_cfg.name) - foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1) - is_feet_on_ground = foot_forces_z > force_threshold - - # 2. 获取头部和躯干索引并提取高度 + # 1. 获取基本状态 head_idx, _ = env.scene["robot"].find_bodies("H2") pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") - current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2] current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2] - # 3. 计算高度达标度 (0.0 - 1.0) - head_reward = torch.clamp(current_head_h / min_head_height, max=1.2) - pelvis_reward = torch.clamp(current_pelvis_h / min_pelvis_height, max=1.0) + # 2. 计算基础高度得分 (0.0 - 1.0) + head_score = torch.clamp(current_head_h / min_head_height, max=1.0) + pelvis_score = torch.clamp(current_pelvis_h / min_pelvis_height, max=1.0) + combined_height_score = (head_score + pelvis_score) / 2.0 - # 综合高度奖励(取平均值) - combined_height_reward = (head_reward + pelvis_reward) / 2.0 + # 3. 计算足底力判定 + contact_sensor = env.scene.sensors.get(sensor_cfg.name) + foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1) + is_feet_on_ground = (foot_forces_z > force_threshold).float() - # 4. 逻辑门:脚不着地,奖励为 0;脚着地后,根据高度给分 - return torch.where(is_feet_on_ground, combined_height_reward, torch.zeros_like(combined_height_reward)) + # 4. 计算速度惩罚 (抑制乱跳) + root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2] + vel_penalty_factor = torch.exp(-4.0 * torch.clamp(torch.abs(root_vel_z) - max_v_z, min=0.0)) + # --- 核心逻辑切换 --- + # 定义一个“过渡高度” (例如盆骨达到 0.4m) + transition_h = 0.4 + + # 如果高度很低:给纯高度奖,诱导它向上动 + low_height_reward = combined_height_score + + # 如果高度较高:给 综合奖 (高度 * 速度限制 * 必须踩地) + high_height_reward = combined_height_score * vel_penalty_factor * is_feet_on_ground + + return torch.where(current_pelvis_h < transition_h, low_height_reward, high_height_reward) def arm_push_up_reward( env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, - height_threshold: float = 0.6 + height_threshold: float = 0.5 ) -> torch.Tensor: - """手臂撑地奖励:辅助机器人从趴/躺状态利用手臂反作用力起身""" + """ + 手臂撑地奖励 + 躯干高度线性引导。 + 引导机器人从完全平躺状态通过手臂受力将躯干撑起。 + """ + # 获取传感器受力 contact_sensor = env.scene.sensors.get(sensor_cfg.name) if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device) - arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2] - max_arm_force = torch.max(arm_forces_z, dim=-1)[0] - - # 当躯干还很低时,鼓励手撑地 + # 获取躯干(Pelvis/Trunk)当前高度 pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2] - pushing_reward = torch.clamp(max_arm_force, max=200.0) / 100.0 - return torch.where(current_height < height_threshold, pushing_reward, torch.zeros_like(pushing_reward)) + # 1. 手臂受力奖:只有在躯干较低时,鼓励手臂产生向上的反作用力 + arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2] + max_arm_force = torch.max(arm_forces_z, dim=-1)[0] + push_reward = torch.clamp(max_arm_force / 50.0, max=1.0) # 归一化受力奖励 + # 2. 躯干高度线性引导:只要在 height_threshold 以下,越高分越高 + # 这解决了“动不动奖励都一样”的问题 + lifting_reward = torch.clamp(current_height / height_threshold, max=1.0) + + # 只有在还没站起来(低于阈值)的时候,才给这两个组合奖励 + return torch.where(current_height < height_threshold, + push_reward + lifting_reward, + torch.zeros_like(push_reward)) def is_standing_still( env: ManagerBasedRLEnv, @@ -134,6 +157,20 @@ def root_vel_z_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> to vel_z = env.scene[asset_cfg.name].data.root_lin_vel_w[:, 2] return torch.square(torch.clamp(vel_z, min=0.0)) +def joint_pos_rel_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: + # 获取相对默认位置的偏差 (num_envs, num_joints) + rel_pos = mdp.joint_pos_rel(env, asset_cfg) + # 计算平方和 (L2) + return torch.sum(torch.square(rel_pos), dim=-1) + +def strict_feet_contact_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor: + """如果脚不着地,直接给一个很大的负分,强制它必须寻找支点""" + contact_sensor = env.scene.sensors.get(sensor_cfg.name) + # 只要有一只脚没力,就判定为不稳 + foot_forces_z = contact_sensor.data.net_forces_w[:, :, 2] + all_feet_cond = torch.min(foot_forces_z, dim=-1)[0] > 5.0 # 左右脚都要有至少5N的力 + + return (~all_feet_cond).float() # 返回1表示违规 # --- 2. 配置类 --- @@ -169,8 +206,8 @@ class T1EventCfg: params={ "asset_cfg": SceneEntityCfg("robot"), "pose_range": { - "roll": (0, 0),#(-1.57, 1.57), - "pitch": (-1.57, 1.57),#(-1.57, 1.57), + "roll": (-0.2, 0.2),#(-1.57, 1.57), + "pitch": (-1.6, -1.4),#(-1.57, 1.57), "yaw": (0, 0),#(-3.14, 3.14), "x": (0.0, 0.0), "y": (0.0, 0.0), @@ -231,19 +268,39 @@ class T1GetUpRewardCfg: # 5. 抑制跳跃:严厉惩罚向上窜的速度 root_vel_z_penalty = RewTerm( func=root_vel_z_l2_local, - weight=-10.0, # 增大负权重 + weight=-50.0, # 增大负权重 params={"asset_cfg": SceneEntityCfg("robot")} ) # 6. 抑制滞空 (Airtime Penalty) - # 如果脚离开地面,按时间扣分 feet_airtime = RewTerm( - func=feet_airtime_penalty_local, - weight=-10.0, - params={ - "sensor_cfg": SceneEntityCfg("feet_contact_sensor"), - "threshold": 0.1, # 超过 0.2s 离地就开始扣分 - } + func=strict_feet_contact_reward, + weight=-20.0, # 加大权重,跳一下扣的分比站起来得的分还多 + params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"])} + ) + + joint_vel_penalty = RewTerm( + func=mdp.joint_vel_l2, + weight=-0.5, # 惩罚过快的关节运动 + params={"asset_cfg": SceneEntityCfg("robot")} + ) + + action_rate = RewTerm( + func=mdp.action_rate_l2, + weight=-0.5, # 惩罚动作的突变,让动作更丝滑,减少爆发力 + ) + + # 惩罚躯干的翻转和俯仰角速度 + base_ang_vel_penalty = RewTerm( + func=lambda env, asset_cfg: torch.norm(mdp.base_ang_vel(env, asset_cfg), dim=-1), + weight=-0.1, + params={"asset_cfg": SceneEntityCfg("robot")} + ) + + joint_deviation = RewTerm( + func=joint_pos_rel_l2_local, + weight=0.1, # 权重不要太高,只是为了让它动起来 + params={"asset_cfg": SceneEntityCfg("robot")} ) # 7. 成功终极大奖 @@ -259,7 +316,7 @@ class T1GetUpTerminationsCfg: time_out = DoneTerm(func=mdp.time_out) # 失败判定:躯干倾斜超过 45 度重置 - base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 0.785}) + #base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 0.785}) # 成功判定:双高度 + 稳定 standing_success = DoneTerm( diff --git a/rl_game/get_up/env/t1_env.py b/rl_game/get_up/env/t1_env.py index 7825549..7e80819 100644 --- a/rl_game/get_up/env/t1_env.py +++ b/rl_game/get_up/env/t1_env.py @@ -54,13 +54,6 @@ class T1SceneCfg(InteractiveSceneCfg): }, ) - - feet_contact_sensor = ContactSensorCfg( - prim_path="{ENV_REGEX_NS}/Robot/.*_foot_link", # 使用正则匹配所有脚部 link - update_period=0.0, # 随物理步长更新 - history_length=3 - ) - contact_sensor = ContactSensorCfg( prim_path="{ENV_REGEX_NS}/Robot/.*", update_period=0.0,