import torch from isaaclab.assets import ArticulationCfg from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv from isaaclab.managers import ObservationGroupCfg as ObsGroup from isaaclab.managers import ObservationTermCfg as ObsTerm from isaaclab.managers import RewardTermCfg as RewTerm from isaaclab.managers import TerminationTermCfg as DoneTerm from isaaclab.managers import EventTermCfg as EventTerm from isaaclab.envs.mdp import JointPositionActionCfg from isaaclab.managers import SceneEntityCfg from isaaclab.utils import configclass from rl_game.get_up.env.t1_env import T1SceneCfg import isaaclab.envs.mdp as mdp # --- 1. 修正后的自定义 MDP 逻辑函数 --- def is_standing_still( env: ManagerBasedRLEnv, minimum_height: float, max_angle_error: float, standing_time: float, velocity_threshold: float = 0.2 ) -> torch.Tensor: """ 判定判定逻辑:高度达标 + 躯干垂直 + 几乎静止 + 维持时间。 增加了速度判定,彻底杜绝起跳瞬间触发。 """ # 1. 获取 Body 索引 head_idx, _ = env.scene["robot"].find_bodies("H2") # 2. 状态量获取 current_head_height = env.scene["robot"].data.body_state_w[:, head_idx[0], 2] # 投影重力误差(越小越垂直) gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1) # 根部线速度和角速度(判定是否晃动) root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1) root_ang_vel_norm = torch.norm(env.scene["robot"].data.root_ang_vel_w, dim=-1) # 3. 综合判定(这里不强制检查力,改用更稳健的速度限制) is_stable_now = ( (current_head_height > minimum_height) & (gravity_error < max_angle_error) & (root_vel_norm < velocity_threshold) & (root_ang_vel_norm < velocity_threshold * 2.0) ) # 4. 计时器 if "stable_timer" not in env.extras: env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device) dt = env.physics_dt * env.cfg.decimation env.extras["stable_timer"] = torch.where( is_stable_now, env.extras["stable_timer"] + dt, torch.zeros_like(env.extras["stable_timer"]) ) return env.extras["stable_timer"] > standing_time def get_success_reward(env: ManagerBasedRLEnv, term_keys: str) -> torch.Tensor: return env.termination_manager.get_term(term_keys) def root_vel_z_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: # 严厉惩罚 Z 轴正向速度(向上窜) vel_z = env.scene[asset_cfg.name].data.root_lin_vel_w[:, 2] return torch.square(torch.clamp(vel_z, min=0.0)) def feet_airtime_penalty_local( env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, threshold: float = 1.0 ) -> torch.Tensor: """ 自定义滞空惩罚逻辑: 如果脚部的垂直合力小于阈值,说明脚离地了。 返回一个 Tensor,离地时为 1.0,着地时为 0.0。 """ # 1. 获取传感器对象 contact_sensor = env.scene.sensors.get(sensor_cfg.name) if contact_sensor is None: # 如果没搜到传感器,返回全 0,防止程序崩溃 return torch.zeros(env.num_envs, device=env.device) # 2. 获取触地力 (num_envs, num_bodies_in_sensor, 3) # 我们取所有被监测 Body (左右脚) 的 Z 轴推力 # 如果所有脚的力都小于 threshold,判定为“完全腾空” foot_forces_z = contact_sensor.data.net_forces_w[:, :, 2] is_in_air = torch.all(foot_forces_z < threshold, dim=-1) return is_in_air.float() # --- 2. 配置类定义 --- T1_JOINT_NAMES = [ 'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll', 'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch', 'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll' ] @configclass class T1ObservationCfg: """观察值空间配置:严格对应你的 Robot 基类数据结构""" @configclass class PolicyCfg(ObsGroup): concatenate_terms = True enable_corruption = False # --- 状态量 (对应你的 Robot 类属性) --- # 1. 基体线速度 (accelerometer 相关的速度项) base_lin_vel = ObsTerm(func=mdp.base_lin_vel) # 2. 角速度 (对应你的 gyroscope 属性: degrees/s -> IsaacLab 默认为 rad/s) base_ang_vel = ObsTerm(func=mdp.base_ang_vel) # 3. 重力投影 (对应 global_orientation_euler/quat 相关的姿态感知) projected_gravity = ObsTerm(func=mdp.projected_gravity) joint_pos = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)}) joint_vel = ObsTerm(func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)}) actions = ObsTerm(func=mdp.last_action) policy = PolicyCfg() @configclass class T1EventCfg: """重置时的随机姿态:背躺、趴着、侧躺""" reset_robot_rotation = EventTerm( func=mdp.reset_root_state_uniform, params={ "asset_cfg": SceneEntityCfg("robot"), "pose_range": { "roll": (0, 0),#(-1.57, 1.57), "pitch": (-1.57, 1.57),#(-1.57, 1.57), "yaw": (0, 0),#(-3.14, 3.14), "x": (0.0, 0.0), "y": (0.0, 0.0), "z": (0.0, 0.0), }, "velocity_range": {}, }, mode="reset", ) @configclass class T1ActionCfg: """关键修改:降低 scale 让动作变丝滑,增大阻尼效果""" joint_pos = JointPositionActionCfg( asset_name="robot", joint_names=T1_JOINT_NAMES, scale=0.2, # 从 0.5 降到 0.2,防止电机暴力抽搐 use_default_offset=True ) @configclass class T1GetUpRewardCfg: # 1. 姿态奖 (核心引导) upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0) # 2. 只有脚着地时才给的高度奖(模拟逻辑) # 修正:直接使用 root_height 配合强力的速度惩罚 height_tracking = RewTerm( func=mdp.root_height_below_minimum, weight=2.0, params={ "minimum_height": 1.05, "asset_cfg": SceneEntityCfg("robot", body_names="H2"), } ) # 3. 抑制跳跃:严厉惩罚向上窜的速度 root_vel_z_penalty = RewTerm( func=root_vel_z_l2_local, weight=-15.0, # 增大负权重 params={"asset_cfg": SceneEntityCfg("robot")} ) # 4. 抑制滞空 (Airtime Penalty) # 如果脚离开地面,按时间扣分 feet_airtime = RewTerm( func=feet_airtime_penalty_local, weight=-15.0, params={ "sensor_cfg": SceneEntityCfg("feet_contact_sensor"), "threshold": 0.2, # 超过 0.2s 离地就开始扣分 } ) # 5. 动作平滑 (非常重要:解决视频中的高频抖动) action_rate = RewTerm(func=mdp.action_rate_l2, weight=-1.0) joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.1, params={"asset_cfg": SceneEntityCfg("robot")}) # 6. 时间惩罚:逼迫它快点站稳 time_penalty = RewTerm(func=mdp.is_alive, weight=-0.5) # 7. 终极奖励 is_success = RewTerm( func=get_success_reward, weight=200.0, # 成功一次给大奖,但判定条件极严 params={"term_keys": "standing_success"} ) @configclass class T1GetUpTerminationsCfg: time_out = DoneTerm(func=mdp.time_out) # 严格的失败判定:躯干倾斜超过 45 度就重置,不让它在地上滚 base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 0.785}) # 严格的成功判定 standing_success = DoneTerm( func=is_standing_still, params={ "minimum_height": 1.05, # H2 高度 "max_angle_error": 0.15, # 极小角度误差(约 8.6 度) "standing_time": 0.8, # 必须保持 0.8 秒(起跳不可能在空中停这么久) "velocity_threshold": 0.15 # 速度必须极低 } ) @configclass class T1EnvCfg(ManagerBasedRLEnvCfg): scene = T1SceneCfg(num_envs=16384, env_spacing=2.5) # 建议先用 4096 验证逻辑 def __post_init__(self): super().__post_init__() self.scene.robot.init_state.pos = (0.0, 0.0, 0.4) # 初始稍微抬高一点点,防止卡地 observations = T1ObservationCfg() rewards = T1GetUpRewardCfg() terminations = T1GetUpTerminationsCfg() events = T1EventCfg() actions = T1ActionCfg() episode_length_s = 5.0 # 缩短时长,增加训练效率 decimation = 4