128 lines
4.4 KiB
Python
128 lines
4.4 KiB
Python
from isaaclab.assets import ArticulationCfg
|
||
from isaaclab.envs import ManagerBasedRLEnvCfg
|
||
from isaaclab.managers import ObservationGroupCfg as ObsGroup
|
||
from isaaclab.managers import ObservationTermCfg as ObsTerm
|
||
from isaaclab.managers import RewardTermCfg as RewTerm
|
||
from isaaclab.managers import TerminationTermCfg as DoneTerm
|
||
from isaaclab.managers import EventTermCfg as EventTerm
|
||
from isaaclab.envs.mdp import JointPositionActionCfg
|
||
from isaaclab.managers import SceneEntityCfg
|
||
from isaaclab.utils import configclass
|
||
from rl_game.get_up.env.t1_env import T1SceneCfg
|
||
|
||
import isaaclab.envs.mdp as mdp
|
||
|
||
|
||
@configclass
|
||
class T1ObservationCfg:
|
||
"""观察值:必须包含姿态和高度,否则机器人不知道自己是怎么躺着的"""
|
||
|
||
@configclass
|
||
class PolicyCfg(ObsGroup):
|
||
concatenate_terms = True
|
||
enable_corruption = False
|
||
|
||
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
|
||
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
|
||
projected_gravity = ObsTerm(func=mdp.projected_gravity)
|
||
# ⬅️ 新增:让机器人知道自己离地多高
|
||
root_height = ObsTerm(func=mdp.root_pos_w)
|
||
joint_pos = ObsTerm(func=mdp.joint_pos_rel)
|
||
joint_vel = ObsTerm(func=mdp.joint_vel_rel)
|
||
actions = ObsTerm(func=mdp.last_action)
|
||
|
||
policy = PolicyCfg()
|
||
|
||
|
||
@configclass
|
||
class T1EventCfg:
|
||
"""事件配置:实现多种随机躺下姿态的关键"""
|
||
|
||
# 每次重置时,随机给基座一个旋转角度
|
||
# 包括:背躺 (Pitch=1.57), 趴着 (Pitch=-1.57), 侧躺 (Roll=1.57)
|
||
reset_robot_rotation = EventTerm(
|
||
func=mdp.reset_root_state_uniform, # 修正为报错建议的函数名
|
||
params={
|
||
"asset_cfg": SceneEntityCfg("robot"),
|
||
"pose_range": {
|
||
"roll": (-1.57, 1.57), # 侧躺随机
|
||
"pitch": (-1.57, 1.57), # 趴下/背躺随机
|
||
"yaw": (-3.14, 3.14), # 航向角随机
|
||
"x": (0.0, 0.0), # 位置不随机(保持在原点)
|
||
"y": (0.0, 0.0),
|
||
"z": (0.0, 0.0), # 高度由 init_state 决定,这里设为0偏移
|
||
},
|
||
"velocity_range": {}, # 初始速度默认为0
|
||
},
|
||
mode="reset",
|
||
)
|
||
|
||
@configclass
|
||
class T1ActionCfg:
|
||
"""动作空间配置组"""
|
||
# 这里的变量名 (例如 joint_pos) 将决定 ActionManager 中的 term 名称
|
||
joint_pos = JointPositionActionCfg(
|
||
asset_name="robot",
|
||
joint_names=[".*"],
|
||
scale=0.5,
|
||
use_default_offset=True
|
||
)
|
||
|
||
@configclass
|
||
class T1GetUpRewardCfg:
|
||
"""起身任务专用奖励:快、直、稳"""
|
||
|
||
# 1. 核心进度:高度奖励
|
||
# 只要低于目标高度(0.6m),就会持续根据距离给负分,越高分数越接近0
|
||
height_progress = RewTerm(
|
||
func=mdp.root_height_below_minimum,
|
||
weight=15.0,
|
||
params={"minimum_height": 0.65}
|
||
)
|
||
|
||
# 2. 速度惩罚:时间就是金钱
|
||
# 每一帧都扣 0.5 分,起身越慢,被扣掉的总分就越多
|
||
time_penalty = RewTerm(
|
||
func=mdp.is_alive,
|
||
weight=-0.5
|
||
)
|
||
|
||
# 3. 姿态奖励:上半身必须垂直向上
|
||
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0)
|
||
|
||
# 4. 平滑惩罚:防止起身时由于急躁导致的剧烈抖动
|
||
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
|
||
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.001)
|
||
|
||
|
||
@configclass
|
||
class T1TerminationsCfg:
|
||
"""终止条件"""
|
||
# 提前成功:如果站起来了并保持稳定,直接结束 Episode 并拿奖励,节省算力
|
||
is_standing_success = DoneTerm(
|
||
func=mdp.root_height_below_minimum,
|
||
params={"minimum_height": 0.65},
|
||
)
|
||
time_out = DoneTerm(func=mdp.time_out)
|
||
|
||
|
||
@configclass
|
||
class T1EnvCfg(ManagerBasedRLEnvCfg):
|
||
"""主环境配置:针对 T1 起身任务"""
|
||
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)
|
||
|
||
# 覆盖初始位置:必须低高度生成,防止随机旋转后高空坠落
|
||
def __post_init__(self):
|
||
super().__post_init__()
|
||
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
|
||
|
||
observations = T1ObservationCfg()
|
||
rewards = T1GetUpRewardCfg()
|
||
terminations = T1TerminationsCfg()
|
||
events = T1EventCfg() # ⬅️ 挂载随机姿态事件
|
||
|
||
actions = T1ActionCfg()
|
||
|
||
# 训练参数
|
||
episode_length_s = 5.0 # 强制 5秒内必须站起来
|
||
decimation = 4 # 提高控制频率 (约 50Hz) 有助于精细动作 |