From 4bc205399c0e23f375b2bd1b9320bd80defa73d1 Mon Sep 17 00:00:00 2001 From: ChenXi Date: Mon, 23 Mar 2026 09:06:36 -0400 Subject: [PATCH] reward modification and change the get_up logic --- rl_game/get_up/config/t1_env_cfg.py | 198 ++++++++++++++++------------ 1 file changed, 114 insertions(+), 84 deletions(-) diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 6a09adc..85921f1 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -1,7 +1,7 @@ -import random -import numpy -import numpy as np import torch +import random +import numpy as np +import isaaclab.envs.mdp as mdp from isaaclab.assets import ArticulationCfg from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv from isaaclab.managers import ObservationGroupCfg as ObsGroup @@ -13,7 +13,6 @@ from isaaclab.envs.mdp import JointPositionActionCfg from isaaclab.managers import SceneEntityCfg from isaaclab.utils import configclass from rl_game.get_up.env.t1_env import T1SceneCfg -import isaaclab.envs.mdp as mdp # --- 1. 自定义 MDP 逻辑函数 --- @@ -52,72 +51,70 @@ def standing_with_feet_reward( return height_reward * (0.5 + 0.5 * force_weight * vel_penalty) -def arm_tuck_incremental_reward( - env: ManagerBasedRLEnv, - pitch_threshold: float = 1.4, - shaping_weight: float = 0.2 -) -> torch.Tensor: - """增量式收手奖励:鼓励向弯曲方向运动,达到阈值给大奖""" - joint_names = ["Left_Elbow_Pitch", "Right_Elbow_Pitch"] - joint_ids, _ = env.scene["robot"].find_joints(joint_names) - - elbow_pos = env.scene["robot"].data.joint_pos[:, joint_ids] - elbow_vel = env.scene["robot"].data.joint_vel[:, joint_ids] - - # 1. 速度引导:只要在收缩(速度为正)就给小奖,伸直则惩罚 - avg_vel = torch.mean(elbow_vel, dim=-1) - shaping_reward = torch.tanh(avg_vel) * shaping_weight - - # 2. 阈值触发:一旦收缩到位,给稳定的静态奖 - is_tucked = torch.all(elbow_pos > pitch_threshold, dim=-1).float() - goal_bonus = is_tucked * 1.5 - - return shaping_reward + goal_bonus - - def dynamic_getup_strategy_reward(env: ManagerBasedRLEnv) -> torch.Tensor: """ - 状态机奖励切换逻辑: - - 仰卧时:重点是 翻身 + 缩手。 - - 俯卧时:重点是 撑地起立。 + 全姿态对称起立策略: + 1. 核心蜷缩 (Spring Loading):无论仰卧还是俯卧,只要高度低,就必须强制收腿。 + 2. 仰卧支撑 (Back-Pushing):在仰卧状态下,鼓励手臂向后发力并抬高盆骨。 + 3. 协同爆发 (Explosive Jump):蜷缩状态下产生的向上动量获得最高倍率奖励。 """ - # 获取重力投影:Z轴分量 > 0 表示仰卧 - gravity_z = env.scene["robot"].data.projected_gravity_b[:, 2] + # --- 1. 获取物理状态 --- + gravity_z = env.scene["robot"].data.projected_gravity_b[:, 2] # 1:仰卧, -1:俯卧 + pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") + curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2] + root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2] - # 状态掩码 - is_on_back = (gravity_z > 0.2).float() - is_on_belly = (gravity_z < -0.2).float() - is_transition = (1.0 - is_on_back - is_on_belly) + # 关节索引:11,12髋, 17,18膝 (确保与T1模型一致) + knee_joints = [17, 18] + hip_pitch_joints = [11, 12] + joint_pos = env.scene["robot"].data.joint_pos - # 1. 翻身势能:引导 gravity_z 向 -1.0 靠拢 - flip_shaping = torch.clamp(-gravity_z, min=-1.0, max=1.0) + # --- 2. 核心蜷缩评分 (Crouch Score) --- + # 无论仰俯,蜷缩是起立的绝对前提。目标是让脚尽可能靠近质心。 + # 提高膝盖弯曲目标 (1.5 rad),引导更深度的折叠 + knee_flex_err = torch.abs(joint_pos[:, knee_joints] - 1.5).sum(dim=-1) + hip_flex_err = torch.abs(joint_pos[:, hip_pitch_joints] - 1.2).sum(dim=-1) + crouch_score = torch.exp(-(knee_flex_err + hip_flex_err) * 0.6) - # 2. 缩手动作 - tuck_rew = arm_tuck_incremental_reward(env) + # 基础蜷缩奖励 (Spring Base) - 权重加大 + crouch_trigger = torch.clamp(0.6 - curr_pelvis_h, min=0.0) + base_crouch_reward = crouch_trigger * crouch_score * 40.0 - # 3. 撑地动作 (复用原逻辑,但去掉内部的高度衰减,统一由状态机控制) + # --- 3. 支撑力奖励 (Support Force) --- + push_reward = torch.zeros_like(curr_pelvis_h) contact_sensor = env.scene.sensors.get("contact_sensor") - max_arm_force = torch.zeros(env.num_envs, device=env.device) if contact_sensor is not None: - # 假设手臂/手部 link 的受力 + # 监测非足部Link(手、臂)的受力 + # 无论正反,只要手能提供垂直向上的推力,就是好手 arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2] - max_arm_force = torch.max(arm_forces_z, dim=-1)[0] + push_reward = torch.tanh(torch.max(arm_forces_z, dim=-1)[0] / 30.0) - push_rew = torch.tanh(torch.clamp(max_arm_force - 15.0, min=0.0) / 40.0) + # --- 4. 姿态特定引导 (Orientation-Neutral) --- + is_back = torch.clamp(gravity_z, min=0.0) # 仰卧程度 + is_belly = torch.clamp(-gravity_z, min=0.0) # 俯卧程度 - # --- 权重动态合成 --- - # 仰卧区:翻身(8.0) + 缩手(4.0) - back_strategy = is_on_back * (8.0 * flip_shaping + 4.0 * tuck_rew) + # A. 仰卧直接起立逻辑: + # 在仰卧时,如果能把盆骨撑起来 (curr_pelvis_h 增加),给予重奖 + # 配合crouch_score,鼓励“收腿-撑地-挺髋”的动作链 + back_lift_reward = is_back * torch.clamp(curr_pelvis_h - 0.15, min=0.0) * crouch_score * 50.0 - # 俯卧区:撑地(25.0) + 缩手维持(1.0) - # 这里撑地权重远高于翻身,确保机器人更愿意待在俯卧区尝试站立 - belly_strategy = is_on_belly * (25.0 * push_rew + 1.0 * tuck_rew) + # B. 俯卧/翻身辅助逻辑 (保留一定的翻身倾向,但不再是唯一路径) + flip_reward = is_back * (1.0 - gravity_z) * 5.0 # 权重降低,仅作为备选 - # 过渡区 - trans_strategy = is_transition * (4.0 * flip_shaping + 10.0 * push_rew + 2.0 * tuck_rew) + # --- 5. 最终爆发项 (The Jump) --- + # 核心公式:蜷缩程度 * 向上速度 * 支撑力感应 + # 这是一个通用的“起跳”奖励,无论正反面,只要满足“缩得紧、跳得快、手有撑”,奖励就爆炸 + explosion_reward = crouch_score * torch.clamp(root_vel_z, min=0.0) * (0.5 + 0.5 * push_reward) * 80.0 - return back_strategy + belly_strategy + trans_strategy + # --- 6. 汇总 --- + total_reward = ( + base_crouch_reward + # 必须缩腿 + back_lift_reward + # 仰卧挺髋 + flip_reward + # 翻身尝试 + explosion_reward # 终极爆发 + ) + return total_reward def is_standing_still( env: ManagerBasedRLEnv, @@ -127,7 +124,6 @@ def is_standing_still( standing_time: float, velocity_threshold: float = 0.15 ) -> torch.Tensor: - """判定逻辑:双高度达标 + 躯干垂直 + 全身静止""" head_idx, _ = env.scene["robot"].find_bodies("H2") pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") @@ -193,7 +189,7 @@ class T1EventCfg: "asset_cfg": SceneEntityCfg("robot"), "pose_range": { "roll": (-1.57, 1.57), - "pitch": tuple(numpy.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])), # 仰卧/俯卧 + "pitch": tuple(np.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])), "yaw": (-3.14, 3.14), "x": (0.0, 0.0), "y": (0.0, 0.0), @@ -207,50 +203,88 @@ class T1EventCfg: @configclass class T1ActionCfg: - joint_pos = JointPositionActionCfg( - asset_name="robot", joint_names=T1_JOINT_NAMES, scale=0.5, use_default_offset=True + # 拆分动作组以防止抽搐。由于不强制规定动作,我们可以给各个部位较为均衡的探索范围。 + arm_action = JointPositionActionCfg( + asset_name="robot", + joint_names=[ + 'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw', + 'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw' + ], + scale=1.2, # 给了手臂相对充裕的自由度去摸索 + use_default_offset=True + ) + + torso_action = JointPositionActionCfg( + asset_name="robot", + joint_names=['Waist', 'AAHead_yaw', 'Head_pitch'], + scale=0.8, + use_default_offset=True + ) + + leg_action = JointPositionActionCfg( + asset_name="robot", + joint_names=[ + 'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll', + 'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch', + 'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll' + ], + scale=0.6, + use_default_offset=True ) @configclass class T1GetUpRewardCfg: - # --- 1. 动态策略整合奖励 (包含了翻身、缩手、撑地的逻辑切换) --- - adaptive_strategy = RewTerm( + # 1. 核心阶段性引导 (翻身 -> 蜷缩 -> 支撑) + dynamic_strategy = RewTerm( func=dynamic_getup_strategy_reward, - weight=1.0 # 内部已经有细分权重 + weight=1.5 ) - # --- 2. 核心高度目标 (维持最高优先级) --- + # 2. 站立质量奖励 (强化双脚受力) height_with_feet = RewTerm( func=standing_with_feet_reward, - weight=15.0, + weight=40.0, # 大权重 params={ "min_head_height": 1.1, "min_pelvis_height": 0.7, "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]), - "force_threshold": 30.0, - "max_v_z": 0.3 + "force_threshold": 40.0, # 必须达到一定压力,防止脚尖点地作弊 + "max_v_z": 0.2 } ) - # --- 3. 辅助约束与惩罚 --- - upright = RewTerm(func=mdp.flat_orientation_l2, weight=1.0) - joint_limits = RewTerm(func=mdp.joint_pos_limits, weight=-20.0, params={"asset_cfg": SceneEntityCfg("robot")}) - action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01) - - # --- 4. 成功奖励 --- - is_success_bonus = RewTerm( - func=is_standing_still, - weight=1000.0, + # 3. 惩罚项:防止钻空子 + # 严厉惩罚:如果躯干(Trunk)或头(H2)直接接触地面,扣大分 + body_contact_penalty = RewTerm( + func=mdp.contact_forces, + weight=-20.0, params={ - "min_head_height": 1.05, - "min_pelvis_height": 0.75, - "max_angle_error": 0.3, - "standing_time": 0.2, - "velocity_threshold": 0.5 + "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk", "H2"]), + "threshold": 1.0 } ) + # 4. 关节功耗惩罚 (防止高频抽搐) + action_rate = RewTerm( + func=mdp.action_rate_l2, + weight=-0.01 + ) + + # 5. 成功维持奖励 + is_success_maintain = RewTerm( + func=is_standing_still, + weight=1000.0, # 巨大的成功奖励 + params={ + "min_head_height": 1.08, + "min_pelvis_height": 0.72, + "max_angle_error": 0.2, + "standing_time": 0.4, # 必须站稳 0.4s + "velocity_threshold": 0.3 + } + ) + + @configclass class T1GetUpTerminationsCfg: time_out = DoneTerm(func=mdp.time_out) @@ -270,15 +304,11 @@ class T1GetUpTerminationsCfg: class T1EnvCfg(ManagerBasedRLEnvCfg): scene = T1SceneCfg(num_envs=8192, env_spacing=2.5) - def __post_init__(self): - super().__post_init__() - self.scene.robot.init_state.pos = (0.0, 0.0, 0.2) - observations = T1ObservationCfg() rewards = T1GetUpRewardCfg() terminations = T1GetUpTerminationsCfg() events = T1EventCfg() actions = T1ActionCfg() - episode_length_s = 6.0 + episode_length_s = 10.0 decimation = 4 \ No newline at end of file