116 lines
4.0 KiB
Python
116 lines
4.0 KiB
Python
|
|
from isaaclab.assets import ArticulationCfg
|
|||
|
|
from isaaclab.envs import ManagerBasedRLEnvCfg
|
|||
|
|
from isaaclab.managers import ObservationGroupCfg as ObsGroup
|
|||
|
|
from isaaclab.managers import ObservationTermCfg as ObsTerm
|
|||
|
|
from isaaclab.managers import RewardTermCfg as RewTerm
|
|||
|
|
from isaaclab.managers import TerminationTermCfg as DoneTerm
|
|||
|
|
from isaaclab.managers import EventTermCfg as EventTerm
|
|||
|
|
from isaaclab.envs.mdp import JointPositionActionCfg
|
|||
|
|
from isaaclab.managers import SceneEntityCfg
|
|||
|
|
from isaaclab.utils import configclass
|
|||
|
|
from rl_game.get_up.env.t1_env import T1SceneCfg
|
|||
|
|
|
|||
|
|
import isaaclab.envs.mdp as mdp
|
|||
|
|
|
|||
|
|
|
|||
|
|
@configclass
|
|||
|
|
class T1ObservationCfg:
|
|||
|
|
"""观察值:必须包含姿态和高度,否则机器人不知道自己是怎么躺着的"""
|
|||
|
|
|
|||
|
|
@configclass
|
|||
|
|
class PolicyCfg(ObsGroup):
|
|||
|
|
concatenate_terms = True
|
|||
|
|
enable_corruption = False
|
|||
|
|
|
|||
|
|
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
|
|||
|
|
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
|
|||
|
|
projected_gravity = ObsTerm(func=mdp.projected_gravity)
|
|||
|
|
# ⬅️ 新增:让机器人知道自己离地多高
|
|||
|
|
root_height = ObsTerm(func=mdp.root_height)
|
|||
|
|
joint_pos = ObsTerm(func=mdp.joint_pos_rel)
|
|||
|
|
joint_vel = ObsTerm(func=mdp.joint_vel_rel)
|
|||
|
|
actions = ObsTerm(func=mdp.last_action)
|
|||
|
|
|
|||
|
|
policy = PolicyCfg()
|
|||
|
|
|
|||
|
|
|
|||
|
|
@configclass
|
|||
|
|
class T1EventCfg:
|
|||
|
|
"""事件配置:实现多种随机躺下姿态的关键"""
|
|||
|
|
|
|||
|
|
# 每次重置时,随机给基座一个旋转角度
|
|||
|
|
# 包括:背躺 (Pitch=1.57), 趴着 (Pitch=-1.57), 侧躺 (Roll=1.57)
|
|||
|
|
reset_robot_rotation = EventTerm(
|
|||
|
|
func=mdp.reset_root_custom_orientation,
|
|||
|
|
params={
|
|||
|
|
"euler_angles_range": {
|
|||
|
|
"roll": (-1.57, 1.57), # 覆盖侧躺
|
|||
|
|
"pitch": (-1.57, 1.57), # 覆盖趴下和背躺
|
|||
|
|
"yaw": (-3.14, 3.14), # 随机朝向
|
|||
|
|
},
|
|||
|
|
"asset_cfg": SceneEntityCfg("robot"),
|
|||
|
|
},
|
|||
|
|
mode="reset",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@configclass
|
|||
|
|
class T1GetUpRewardCfg:
|
|||
|
|
"""起身任务专用奖励:快、直、稳"""
|
|||
|
|
|
|||
|
|
# 1. 核心进度:高度奖励
|
|||
|
|
# 只要低于目标高度(0.6m),就会持续根据距离给负分,越高分数越接近0
|
|||
|
|
height_progress = RewTerm(
|
|||
|
|
func=mdp.root_height_below_minimum,
|
|||
|
|
weight=15.0,
|
|||
|
|
params={"minimum_height": 0.65}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 2. 速度惩罚:时间就是金钱
|
|||
|
|
# 每一帧都扣 0.5 分,起身越慢,被扣掉的总分就越多
|
|||
|
|
time_penalty = RewTerm(
|
|||
|
|
func=mdp.is_alive,
|
|||
|
|
weight=-0.5
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 3. 姿态奖励:上半身必须垂直向上
|
|||
|
|
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0)
|
|||
|
|
|
|||
|
|
# 4. 平滑惩罚:防止起身时由于急躁导致的剧烈抖动
|
|||
|
|
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
|
|||
|
|
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.001)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@configclass
|
|||
|
|
class T1TerminationsCfg:
|
|||
|
|
"""终止条件"""
|
|||
|
|
# 提前成功:如果站起来了并保持稳定,直接结束 Episode 并拿奖励,节省算力
|
|||
|
|
is_standing_success = DoneTerm(
|
|||
|
|
func=mdp.root_height_below_minimum,
|
|||
|
|
params={"minimum_height": 0.65, "inverted": True} # 高于 0.65m 则重置
|
|||
|
|
)
|
|||
|
|
time_out = DoneTerm(func=mdp.time_out)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@configclass
|
|||
|
|
class T1EnvCfg(ManagerBasedRLEnvCfg):
|
|||
|
|
"""主环境配置:针对 T1 起身任务"""
|
|||
|
|
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)
|
|||
|
|
|
|||
|
|
# 覆盖初始位置:必须低高度生成,防止随机旋转后高空坠落
|
|||
|
|
def __post_init__(self):
|
|||
|
|
super().__post_init__()
|
|||
|
|
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
|
|||
|
|
|
|||
|
|
observations = T1ObservationCfg()
|
|||
|
|
rewards = T1GetUpRewardCfg()
|
|||
|
|
terminations = T1TerminationsCfg()
|
|||
|
|
events = T1EventCfg() # ⬅️ 挂载随机姿态事件
|
|||
|
|
|
|||
|
|
actions = JointPositionActionCfg(
|
|||
|
|
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 训练参数
|
|||
|
|
episode_length_s = 5.0 # 强制 5秒内必须站起来
|
|||
|
|
decimation = 4 # 提高控制频率 (约 50Hz) 有助于精细动作
|