from isaaclab.assets import ArticulationCfg from isaaclab.envs import ManagerBasedRLEnvCfg from isaaclab.managers import ObservationGroupCfg as ObsGroup from isaaclab.managers import ObservationTermCfg as ObsTerm from isaaclab.managers import RewardTermCfg as RewTerm from isaaclab.managers import TerminationTermCfg as DoneTerm from isaaclab.managers import EventTermCfg as EventTerm from isaaclab.envs.mdp import JointPositionActionCfg from isaaclab.managers import SceneEntityCfg from isaaclab.utils import configclass from rl_game.get_up.env.t1_env import T1SceneCfg import isaaclab.envs.mdp as mdp @configclass class T1ObservationCfg: """观察值:必须包含姿态和高度,否则机器人不知道自己是怎么躺着的""" @configclass class PolicyCfg(ObsGroup): concatenate_terms = True enable_corruption = False base_lin_vel = ObsTerm(func=mdp.base_lin_vel) base_ang_vel = ObsTerm(func=mdp.base_ang_vel) projected_gravity = ObsTerm(func=mdp.projected_gravity) # ⬅️ 新增:让机器人知道自己离地多高 root_height = ObsTerm(func=mdp.root_pos_w) joint_pos = ObsTerm(func=mdp.joint_pos_rel) joint_vel = ObsTerm(func=mdp.joint_vel_rel) actions = ObsTerm(func=mdp.last_action) policy = PolicyCfg() @configclass class T1EventCfg: """事件配置:实现多种随机躺下姿态的关键""" # 每次重置时,随机给基座一个旋转角度 # 包括:背躺 (Pitch=1.57), 趴着 (Pitch=-1.57), 侧躺 (Roll=1.57) reset_robot_rotation = EventTerm( func=mdp.reset_root_state_uniform, # 修正为报错建议的函数名 params={ "asset_cfg": SceneEntityCfg("robot"), "pose_range": { "roll": (-1.57, 1.57), # 侧躺随机 "pitch": (-1.57, 1.57), # 趴下/背躺随机 "yaw": (-3.14, 3.14), # 航向角随机 "x": (0.0, 0.0), # 位置不随机(保持在原点) "y": (0.0, 0.0), "z": (0.0, 0.0), # 高度由 init_state 决定,这里设为0偏移 }, "velocity_range": {}, # 初始速度默认为0 }, mode="reset", ) @configclass class T1ActionCfg: """动作空间配置组""" # 这里的变量名 (例如 joint_pos) 将决定 ActionManager 中的 term 名称 joint_pos = JointPositionActionCfg( asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True ) @configclass class T1GetUpRewardCfg: """起身任务专用奖励:快、直、稳""" # 1. 核心进度:高度奖励 # 只要低于目标高度(0.6m),就会持续根据距离给负分,越高分数越接近0 height_progress = RewTerm( func=mdp.root_height_below_minimum, weight=15.0, params={"minimum_height": 0.65} ) # 2. 速度惩罚:时间就是金钱 # 每一帧都扣 0.5 分,起身越慢,被扣掉的总分就越多 time_penalty = RewTerm( func=mdp.is_alive, weight=-0.5 ) # 3. 姿态奖励:上半身必须垂直向上 upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0) # 4. 平滑惩罚:防止起身时由于急躁导致的剧烈抖动 action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01) joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.001) @configclass class T1TerminationsCfg: """终止条件""" # 提前成功:如果站起来了并保持稳定,直接结束 Episode 并拿奖励,节省算力 is_standing_success = DoneTerm( func=mdp.root_height_below_minimum, params={"minimum_height": 0.65}, ) time_out = DoneTerm(func=mdp.time_out) @configclass class T1EnvCfg(ManagerBasedRLEnvCfg): """主环境配置:针对 T1 起身任务""" scene = T1SceneCfg(num_envs=16384, env_spacing=2.5) # 覆盖初始位置:必须低高度生成,防止随机旋转后高空坠落 def __post_init__(self): super().__post_init__() self.scene.robot.init_state.pos = (0.0, 0.0, 0.2) observations = T1ObservationCfg() rewards = T1GetUpRewardCfg() terminations = T1TerminationsCfg() events = T1EventCfg() # ⬅️ 挂载随机姿态事件 actions = T1ActionCfg() # 训练参数 episode_length_s = 5.0 # 强制 5秒内必须站起来 decimation = 4 # 提高控制频率 (约 50Hz) 有助于精细动作