diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 6f7916f..ac5d155 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -10,6 +10,7 @@ from isaaclab.envs.mdp import JointPositionActionCfg from isaaclab.managers import SceneEntityCfg from isaaclab.utils import configclass from rl_game.get_up.env.t1_env import T1SceneCfg +import isaaclab.utils.math as math_utils import isaaclab.envs.mdp as mdp @@ -61,36 +62,47 @@ def standing_with_feet_reward( return torch.where(current_pelvis_h < transition_h, low_height_reward, high_height_reward) + def arm_push_up_reward( env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, - height_threshold: float = 0.5 + height_threshold: float = 0.5, + min_force: float = 20.0 ) -> torch.Tensor: """ - 手臂撑地奖励 + 躯干高度线性引导。 - 引导机器人从完全平躺状态通过手臂受力将躯干撑起。 + 强化版手臂撑地奖励: + 1. 鼓励手臂产生超过阈值的垂直反作用力。 + 2. 当手臂用力且躯干有向上速度时,给予额外加成。 """ - # 获取传感器受力 + # 获取手臂传感器数据 contact_sensor = env.scene.sensors.get(sensor_cfg.name) - if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device) + if contact_sensor is None: + return torch.zeros(env.num_envs, device=env.device) - # 获取躯干(Pelvis/Trunk)当前高度 - pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") - current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2] - - # 1. 手臂受力奖:只有在躯干较低时,鼓励手臂产生向上的反作用力 + # 1. 获取手臂 Z 轴受力 (取所有手臂 Body 的合力或最大力) arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2] max_arm_force = torch.max(arm_forces_z, dim=-1)[0] - push_reward = torch.clamp(max_arm_force / 50.0, max=1.0) # 归一化受力奖励 - # 2. 躯干高度线性引导:只要在 height_threshold 以下,越高分越高 - # 这解决了“动不动奖励都一样”的问题 - lifting_reward = torch.clamp(current_height / height_threshold, max=1.0) + # 归一化受力奖励:在 20N 到 100N 之间线性增长 + force_reward = torch.clamp((max_arm_force - min_force) / 80.0, min=0.0, max=1.0) - # 只有在还没站起来(低于阈值)的时候,才给这两个组合奖励 + # 2. 获取躯干高度和垂直速度 + pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") + current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2] + root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2] + + # 3. 协同奖励:当手臂在用力推,且躯干正在上升时,给高分 + # 只有在高度低于阈值(还在撑起阶段)时生效 + pushing_up_bonus = torch.where( + (max_arm_force > min_force) & (root_vel_z > 0.05), + force_reward * (1.0 + root_vel_z * 2.0), # 速度越快奖励越高 + force_reward + ) + + # 只有在躯干较低时才发放此奖励 return torch.where(current_height < height_threshold, - push_reward + lifting_reward, - torch.zeros_like(push_reward)) + pushing_up_bonus, + torch.zeros_like(pushing_up_bonus)) def is_standing_still( env: ManagerBasedRLEnv, @@ -172,6 +184,51 @@ def strict_feet_contact_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCf return (~all_feet_cond).float() # 返回1表示违规 + +def reset_root_state_symmetric( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + asset_cfg: SceneEntityCfg, + pose_range: dict, + velocity_range: dict +): + """随机对称采样:让机器人随机以 趴/躺/左倾/右倾 及其组合姿态重置""" + robot = env.scene[asset_cfg.name] + num_resets = len(env_ids) + device = env.device + + # 1. 采样 Euler 角 (roll, pitch, yaw) + # 我们先在 positive range 内采样 + def get_rand(key): + low, high = pose_range[key] + return (high - low) * torch.rand(num_resets, device=device) + low + + roll = get_rand("roll") + pitch = get_rand("pitch") + yaw = (pose_range["yaw"][1] - pose_range["yaw"][0]) * torch.rand(num_resets, device=device) + pose_range["yaw"][0] + + # 2. 核心:随机符号化 (Random Sign) + # 50% 概率保持正,50% 概率变负。Roll 和 Pitch 独立随机。 + roll_sign = (torch.randint(0, 2, (num_resets,), device=device) * 2 - 1).float() + pitch_sign = (torch.randint(0, 2, (num_resets,), device=device) * 2 - 1).float() + + roll *= roll_sign + pitch *= pitch_sign + + # 3. 应用到状态 + quat = math_utils.quat_from_euler_xyz(roll, pitch, yaw) + + # 获取默认状态并叠加随机位置偏移 (x, y, z) + root_states = robot.data.default_root_state[env_ids].clone() + for i, key in enumerate(["x", "y", "z"]): + low, high = pose_range[key] + root_states[:, i] += (high - low) * torch.rand(num_resets, device=device) + low + + root_states[:, 3:7] = quat + + # 4. 写入仿真 (同时清除速度,确保是静态开始) + robot.write_root_state_to_sim(root_states, env_ids) + # --- 2. 配置类 --- T1_JOINT_NAMES = [ @@ -200,18 +257,17 @@ class T1ObservationCfg: @configclass class T1EventCfg: - """随机初始化:支持趴、躺、侧身""" reset_robot_rotation = EventTerm( - func=mdp.reset_root_state_uniform, + func=reset_root_state_symmetric, # 使用上面那个通用的函数 params={ "asset_cfg": SceneEntityCfg("robot"), "pose_range": { - "roll": (-0.2, 0.2),#(-1.57, 1.57), - "pitch": (-1.6, -1.4),#(-1.57, 1.57), - "yaw": (0, 0),#(-3.14, 3.14), + "roll": (0, 1.57), # 左右侧卧 + "pitch": (1.4, 1.6), # 仰卧/俯卧 + "yaw": (-3.14, 3.14), # 全向旋转 "x": (0.0, 0.0), "y": (0.0, 0.0), - "z": (0.0, 0.0), + "z": (0.4, 0.5), }, "velocity_range": {}, }, @@ -238,7 +294,7 @@ class T1GetUpRewardCfg: # 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地 height_with_feet = RewTerm( func=standing_with_feet_reward, - weight=5.0, # 作为核心引导,增加权重 + weight=30.0, # 作为核心引导,增加权重 params={ "min_head_height": 1.10, "min_pelvis_height": 0.65, @@ -250,17 +306,21 @@ class T1GetUpRewardCfg: # 3. 手臂撑地奖:辅助脱离地面阶段 arm_push_support = RewTerm( func=arm_push_up_reward, - weight=3.0, - params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link"])} + weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键 + params={ + "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]), + "height_threshold": 0.6, # 躯干升到 0.6m 前都鼓励手臂用力 + "min_force": 15.0 # 只要有 15N 的力就触发 + } ) # 4. 惩罚项 undesired_contacts = RewTerm( func=mdp.undesired_contacts, - weight=-5.0, + weight=-10.0, params={ - "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["H2", "Trunk", ".*_Left", ".*_Right"]), - # 注意:这里我们排除了脚部,惩罚大腿/小腿/躯干着地 + "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["H2", "Trunk"]), + # 注意:此处必须排除手臂相关 link,否则手臂用力时会同时被扣分 "threshold": 1.0 } ) diff --git a/rl_game/get_up/env/t1_env.py b/rl_game/get_up/env/t1_env.py index 7e80819..8a96266 100644 --- a/rl_game/get_up/env/t1_env.py +++ b/rl_game/get_up/env/t1_env.py @@ -12,12 +12,20 @@ T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "t1_locomotion_physics.usd" @configclass class T1SceneCfg(InteractiveSceneCfg): - """修改后的 T1 机器人场景配置,适配随机姿态起身任务""" + """最终修正版:彻底解决 Unknown asset config type 报错""" - # 1. 地面配置 (如果之前没定义,必须加上) + # 1. 地面配置:直接在 spawn 内部定义材质 ground = AssetBaseCfg( prim_path="/World/ground", - spawn=sim_utils.GroundPlaneCfg(), + spawn=sim_utils.GroundPlaneCfg( + physics_material=sim_utils.RigidBodyMaterialCfg( + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.3, + friction_combine_mode="average", + restitution_combine_mode="average", + ) + ), ) # 2. 机器人配置 @@ -28,28 +36,25 @@ class T1SceneCfg(InteractiveSceneCfg): activate_contact_sensors=True, rigid_props=sim_utils.RigidBodyPropertiesCfg( disable_gravity=False, - max_depenetration_velocity=10.0, # ⬅️ 提高穿透修正速度,防止肢体卡在地下 + max_depenetration_velocity=10.0, ), articulation_props=sim_utils.ArticulationRootPropertiesCfg( - enabled_self_collisions=True, # 开启自碰撞,起身时肢体可能互碰 - solver_position_iteration_count=8, # ⬅️ 增加迭代次数,提高物理稳定性 + enabled_self_collisions=True, + solver_position_iteration_count=8, solver_velocity_iteration_count=4, ), ), init_state=ArticulationCfg.InitialStateCfg( - # ⬅️ 核心修改:高度降低。因为是躺着生成,0.2m 比较合适 - pos=(0.0, 0.0, 0.3), - # 默认旋转设为单位阵,具体的随机化由 Event 管理器处理 - rot=(1.0, 0.0, 0.0, 0.0), + pos=(0.0, 0.0, 0.4), # 掉落高度 joint_pos={".*": 0.0}, ), actuators={ - "legs": ImplicitActuatorCfg( + "t1_joints": ImplicitActuatorCfg( joint_names_expr=[".*"], - effort_limit=400.0, # T1 通常有较强的力矩输出 - velocity_limit=20.0, # ⬅️ 调高速度限制,起身需要爆发动作 - stiffness=150.0, # ⬅️ 调高刚度:起身需要更“硬”的支撑 - damping=5.0, # ⬅️ 增加阻尼:防止机器人站起瞬间晃动太厉害 + effort_limit=400.0, + velocity_limit=20.0, + stiffness=150.0, + damping=5.0, ), }, )