Files
Gym_GPU/rl_game/get_up/config/t1_env_cfg.py

244 lines
8.4 KiB
Python
Raw Normal View History

import torch
2026-03-16 05:00:20 -04:00
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
2026-03-16 05:00:20 -04:00
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg
import isaaclab.envs.mdp as mdp
# --- 1. 自定义 MDP 逻辑函数 (放在配置类之前) ---
def is_standing_still(
env: ManagerBasedRLEnv,
minimum_height: float,
max_angle_error: float,
standing_time: float
) -> torch.Tensor:
"""
判定机器人是否稳定站立了一段时间
逻辑高度达标且姿态垂直持续时间超过 standing_time 则返回 True
"""
# 获取当前状态:高度 (Z轴) 和 投影重力 (前两个分量越小越垂直)
current_height = env.scene["robot"].data.root_pos_w[:, 2]
gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
# 判断当前时刻是否“达标”
is_stable_now = (current_height > minimum_height) & (gravity_error < max_angle_error)
# 在 env.extras 中维护一个计时器
if "stable_timer" not in env.extras:
env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)
# 核心计时逻辑:达标累加 dt不达标清零
# dt = physics_dt * decimation (即控制频率的步长)
dt = env.physics_dt * env.cfg.decimation
env.extras["stable_timer"] = torch.where(
is_stable_now,
env.extras["stable_timer"] + dt,
torch.zeros_like(env.extras["stable_timer"])
)
# 判定是否达到目标时长
return env.extras["stable_timer"] > standing_time
def get_success_reward(env: ManagerBasedRLEnv, term_keys: str) -> torch.Tensor:
"""检查是否触发了特定的成功终止条件"""
return env.termination_manager.get_term(term_keys)
def root_vel_z_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""专门惩罚 Z 轴方向的速度"""
# 获取根部速度 (num_envs, 3) -> [vx, vy, vz]
vel = env.scene[asset_cfg.name].data.root_lin_vel_w
# 只取 Z 轴vel[:, 2]
return torch.square(vel[:, 2])
def joint_torques_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""计算机器人所有关节施加扭矩的平方和"""
# 从 data.applied_torques 获取数据,通常形状为 (num_envs, num_joints)
torques = env.scene[asset_cfg.name].data.applied_torque
return torch.sum(torch.square(torques), dim=-1)
def joint_vel_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""计算机器人所有关节速度的平方和"""
# 从 data.joint_vel 获取数据
vel = env.scene[asset_cfg.name].data.joint_vel
return torch.sum(torch.square(vel), dim=-1)
# --- 2. 配置类定义 ---
## 1. 定义与你的类一致的关节列表 (按照 ROBOT_MOTORS 的顺序)
T1_JOINT_NAMES = [
'Left_Hip_Pitch', 'Right_Hip_Pitch',
'Left_Hip_Roll', 'Right_Hip_Roll',
'Left_Hip_Yaw', 'Right_Hip_Yaw',
'Left_Knee_Pitch', 'Right_Knee_Pitch',
'Left_Ankle_Pitch', 'Right_Ankle_Pitch',
'Left_Ankle_Roll', 'Right_Ankle_Roll'
]
2026-03-16 05:00:20 -04:00
@configclass
class T1ObservationCfg:
"""观察值空间配置:严格对应你的 Robot 基类数据结构"""
2026-03-16 05:00:20 -04:00
@configclass
class PolicyCfg(ObsGroup):
concatenate_terms = True
enable_corruption = False
# --- 状态量 (对应你的 Robot 类属性) ---
# 1. 基体线速度 (accelerometer 相关的速度项)
2026-03-16 05:00:20 -04:00
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
# 2. 角速度 (对应你的 gyroscope 属性: degrees/s -> IsaacLab 默认为 rad/s)
2026-03-16 05:00:20 -04:00
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
# 3. 重力投影 (对应 global_orientation_euler/quat 相关的姿态感知)
2026-03-16 05:00:20 -04:00
projected_gravity = ObsTerm(func=mdp.projected_gravity)
# 4. 关节位置 (对应 motor_positions)
# 使用 joint_pos_rel 获取相对于默认姿态的偏差,显式指定关节顺序
joint_pos = ObsTerm(
func=mdp.joint_pos_rel,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)}
)
# 5. 关节速度 (对应 motor_speeds)
joint_vel = ObsTerm(
func=mdp.joint_vel_rel,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)}
)
# 6. 上一次的动作 (对应 motor_targets)
2026-03-16 05:00:20 -04:00
actions = ObsTerm(func=mdp.last_action)
policy = PolicyCfg()
@configclass
class T1EventCfg:
"""重置时的随机姿态:背躺、趴着、侧躺"""
2026-03-16 05:00:20 -04:00
reset_robot_rotation = EventTerm(
func=mdp.reset_root_state_uniform,
2026-03-16 05:00:20 -04:00
params={
"asset_cfg": SceneEntityCfg("robot"),
2026-03-16 05:46:49 -04:00
"pose_range": {
"roll": (-1.57, 1.57),
"pitch": (-1.57, 1.57),
"yaw": (-3.14, 3.14),
"x": (0.0, 0.0),
2026-03-16 05:46:49 -04:00
"y": (0.0, 0.0),
"z": (0.0, 0.0),
2026-03-16 05:46:49 -04:00
},
"velocity_range": {},
2026-03-16 05:00:20 -04:00
},
mode="reset",
)
2026-03-16 05:46:49 -04:00
@configclass
class T1ActionCfg:
"""动作空间"""
2026-03-16 05:46:49 -04:00
joint_pos = JointPositionActionCfg(
asset_name="robot",
joint_names=T1_JOINT_NAMES,
2026-03-16 05:46:49 -04:00
scale=0.5,
use_default_offset=True
)
2026-03-16 05:00:20 -04:00
2026-03-16 05:00:20 -04:00
@configclass
class T1GetUpRewardCfg:
"""优化后的奖励函数:抑制跳跃,引导稳健起身"""
# 1. 高度引导 (改为平滑的指数奖励)
# 相比 root_height_below_minimum这个函数会让机器人越接近目标高度得分越高且曲线平稳
height_tracking = RewTerm(
func=mdp.root_height_below_minimum, # 如果没有自定义函数,保留这个但调低权重
weight=5.0, # 降低权重,防止“弹射”
2026-03-16 05:00:20 -04:00
params={"minimum_height": 0.65}
)
# 2. 姿态奖 (保持不变,这是核心)
2026-03-16 05:00:20 -04:00
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0)
# 3. 稳定性引导 (增加对速度的惩罚,抑制跳跃)
# 惩罚过大的垂直速度,防止“跳起”
root_vel_z_penalty = RewTerm(
func=root_vel_z_l2_local, # 使用本地函数
weight=-2.0,
params={"asset_cfg": SceneEntityCfg("robot")} # 传入资产配置
)
# 4. 关节与能量约束 (防止 NaN 和乱跳的关键)
joint_vel = RewTerm(
func=joint_vel_l2_local,
weight=-0.005,
params={"asset_cfg": SceneEntityCfg("robot")}
)
applied_torque = RewTerm(
func=joint_torques_l2_local,
weight=-1.0e-5,
params={"asset_cfg": SceneEntityCfg("robot")}
)
# 5. 动作平滑 (非常重要)
action_rate = RewTerm(
func=mdp.action_rate_l2,
weight=-0.05 # 增大权重,强制动作连贯
)
# 6. 核心终点奖励
is_success = RewTerm(
func=get_success_reward,
weight=1000.0, # 成功奖励可以给高点,但前提是动作要平稳
params={"term_keys": "standing_success"}
)
2026-03-16 05:00:20 -04:00
# 7. 生存奖励 (保持微小正值即可)
is_alive = RewTerm(func=mdp.is_alive, weight=0.1)
2026-03-16 05:00:20 -04:00
@configclass
class T1GetUpTerminationsCfg:
"""终止条件:站稳即算任务完成"""
# 失败:跌倒
base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 1.0})
# 成功:满足高度和角度要求,且维持 1.0 秒
standing_success = DoneTerm(
func=is_standing_still,
params={
"minimum_height": 0.63,
"max_angle_error": 0.15,
"standing_time": 1.0
}
2026-03-16 05:00:20 -04:00
)
@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
"""主环境配置"""
2026-03-16 05:00:20 -04:00
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)
def __post_init__(self):
super().__post_init__()
# 初始高度设低,配合随机旋转事件实现“从地上爬起来”
2026-03-16 05:00:20 -04:00
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
observations = T1ObservationCfg()
rewards = T1GetUpRewardCfg()
terminations = T1GetUpTerminationsCfg()
events = T1EventCfg()
2026-03-16 05:46:49 -04:00
actions = T1ActionCfg()
2026-03-16 05:00:20 -04:00
episode_length_s = 5.0 # 5秒强制重置
decimation = 4 # 控制频率