The demo of get up
This commit is contained in:
116
rl_game/get_up/config/t1_env_cfg.py
Normal file
116
rl_game/get_up/config/t1_env_cfg.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from isaaclab.assets import ArticulationCfg
|
||||
from isaaclab.envs import ManagerBasedRLEnvCfg
|
||||
from isaaclab.managers import ObservationGroupCfg as ObsGroup
|
||||
from isaaclab.managers import ObservationTermCfg as ObsTerm
|
||||
from isaaclab.managers import RewardTermCfg as RewTerm
|
||||
from isaaclab.managers import TerminationTermCfg as DoneTerm
|
||||
from isaaclab.managers import EventTermCfg as EventTerm
|
||||
from isaaclab.envs.mdp import JointPositionActionCfg
|
||||
from isaaclab.managers import SceneEntityCfg
|
||||
from isaaclab.utils import configclass
|
||||
from rl_game.get_up.env.t1_env import T1SceneCfg
|
||||
|
||||
import isaaclab.envs.mdp as mdp
|
||||
|
||||
|
||||
@configclass
|
||||
class T1ObservationCfg:
|
||||
"""观察值:必须包含姿态和高度,否则机器人不知道自己是怎么躺着的"""
|
||||
|
||||
@configclass
|
||||
class PolicyCfg(ObsGroup):
|
||||
concatenate_terms = True
|
||||
enable_corruption = False
|
||||
|
||||
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
|
||||
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
|
||||
projected_gravity = ObsTerm(func=mdp.projected_gravity)
|
||||
# ⬅️ 新增:让机器人知道自己离地多高
|
||||
root_height = ObsTerm(func=mdp.root_height)
|
||||
joint_pos = ObsTerm(func=mdp.joint_pos_rel)
|
||||
joint_vel = ObsTerm(func=mdp.joint_vel_rel)
|
||||
actions = ObsTerm(func=mdp.last_action)
|
||||
|
||||
policy = PolicyCfg()
|
||||
|
||||
|
||||
@configclass
|
||||
class T1EventCfg:
|
||||
"""事件配置:实现多种随机躺下姿态的关键"""
|
||||
|
||||
# 每次重置时,随机给基座一个旋转角度
|
||||
# 包括:背躺 (Pitch=1.57), 趴着 (Pitch=-1.57), 侧躺 (Roll=1.57)
|
||||
reset_robot_rotation = EventTerm(
|
||||
func=mdp.reset_root_custom_orientation,
|
||||
params={
|
||||
"euler_angles_range": {
|
||||
"roll": (-1.57, 1.57), # 覆盖侧躺
|
||||
"pitch": (-1.57, 1.57), # 覆盖趴下和背躺
|
||||
"yaw": (-3.14, 3.14), # 随机朝向
|
||||
},
|
||||
"asset_cfg": SceneEntityCfg("robot"),
|
||||
},
|
||||
mode="reset",
|
||||
)
|
||||
|
||||
|
||||
@configclass
|
||||
class T1GetUpRewardCfg:
|
||||
"""起身任务专用奖励:快、直、稳"""
|
||||
|
||||
# 1. 核心进度:高度奖励
|
||||
# 只要低于目标高度(0.6m),就会持续根据距离给负分,越高分数越接近0
|
||||
height_progress = RewTerm(
|
||||
func=mdp.root_height_below_minimum,
|
||||
weight=15.0,
|
||||
params={"minimum_height": 0.65}
|
||||
)
|
||||
|
||||
# 2. 速度惩罚:时间就是金钱
|
||||
# 每一帧都扣 0.5 分,起身越慢,被扣掉的总分就越多
|
||||
time_penalty = RewTerm(
|
||||
func=mdp.is_alive,
|
||||
weight=-0.5
|
||||
)
|
||||
|
||||
# 3. 姿态奖励:上半身必须垂直向上
|
||||
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0)
|
||||
|
||||
# 4. 平滑惩罚:防止起身时由于急躁导致的剧烈抖动
|
||||
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
|
||||
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.001)
|
||||
|
||||
|
||||
@configclass
|
||||
class T1TerminationsCfg:
|
||||
"""终止条件"""
|
||||
# 提前成功:如果站起来了并保持稳定,直接结束 Episode 并拿奖励,节省算力
|
||||
is_standing_success = DoneTerm(
|
||||
func=mdp.root_height_below_minimum,
|
||||
params={"minimum_height": 0.65, "inverted": True} # 高于 0.65m 则重置
|
||||
)
|
||||
time_out = DoneTerm(func=mdp.time_out)
|
||||
|
||||
|
||||
@configclass
|
||||
class T1EnvCfg(ManagerBasedRLEnvCfg):
|
||||
"""主环境配置:针对 T1 起身任务"""
|
||||
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)
|
||||
|
||||
# 覆盖初始位置:必须低高度生成,防止随机旋转后高空坠落
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
|
||||
|
||||
observations = T1ObservationCfg()
|
||||
rewards = T1GetUpRewardCfg()
|
||||
terminations = T1TerminationsCfg()
|
||||
events = T1EventCfg() # ⬅️ 挂载随机姿态事件
|
||||
|
||||
actions = JointPositionActionCfg(
|
||||
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
|
||||
)
|
||||
|
||||
# 训练参数
|
||||
episode_length_s = 5.0 # 强制 5秒内必须站起来
|
||||
decimation = 4 # 提高控制频率 (约 50Hz) 有助于精细动作
|
||||
Reference in New Issue
Block a user