Files
Gym_GPU/rl_game/get_up/config/t1_env_cfg.py

292 lines
10 KiB
Python
Raw Normal View History

import torch
2026-03-16 05:00:20 -04:00
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
2026-03-16 05:00:20 -04:00
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg
import isaaclab.envs.mdp as mdp
2026-03-19 09:08:57 -04:00
# --- 1. 自定义 MDP 逻辑函数 ---
2026-03-19 09:08:57 -04:00
def standing_with_feet_reward(
env: ManagerBasedRLEnv,
2026-03-19 09:08:57 -04:00
min_head_height: float,
min_pelvis_height: float,
sensor_cfg: SceneEntityCfg,
force_threshold: float = 30.0
) -> torch.Tensor:
"""
2026-03-19 09:08:57 -04:00
双高度条件奖励只有脚踩地且头和躯干同时达到高度才给予高度奖励
"""
2026-03-19 09:08:57 -04:00
# 1. 获取脚部触地力判定
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
is_feet_on_ground = foot_forces_z > force_threshold
# 2. 获取头部和躯干索引并提取高度
head_idx, _ = env.scene["robot"].find_bodies("H2")
2026-03-19 09:08:57 -04:00
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
2026-03-19 09:08:57 -04:00
current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
2026-03-19 06:29:30 -04:00
2026-03-19 09:08:57 -04:00
# 3. 计算高度达标度 (0.0 - 1.0)
head_reward = torch.clamp(current_head_h / min_head_height, max=1.2)
pelvis_reward = torch.clamp(current_pelvis_h / min_pelvis_height, max=1.0)
2026-03-19 09:08:57 -04:00
# 综合高度奖励(取平均值)
combined_height_reward = (head_reward + pelvis_reward) / 2.0
2026-03-19 09:08:57 -04:00
# 4. 逻辑门:脚不着地,奖励为 0脚着地后根据高度给分
return torch.where(is_feet_on_ground, combined_height_reward, torch.zeros_like(combined_height_reward))
2026-03-19 09:08:57 -04:00
def arm_push_up_reward(
env: ManagerBasedRLEnv,
sensor_cfg: SceneEntityCfg,
height_threshold: float = 0.6
) -> torch.Tensor:
"""手臂撑地奖励:辅助机器人从趴/躺状态利用手臂反作用力起身"""
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device)
2026-03-19 06:29:30 -04:00
2026-03-19 09:08:57 -04:00
arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
max_arm_force = torch.max(arm_forces_z, dim=-1)[0]
2026-03-19 09:08:57 -04:00
# 当躯干还很低时,鼓励手撑地
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
2026-03-19 06:29:30 -04:00
2026-03-19 09:08:57 -04:00
pushing_reward = torch.clamp(max_arm_force, max=200.0) / 100.0
return torch.where(current_height < height_threshold, pushing_reward, torch.zeros_like(pushing_reward))
2026-03-19 06:29:30 -04:00
2026-03-19 09:08:57 -04:00
def is_standing_still(
2026-03-19 06:29:30 -04:00
env: ManagerBasedRLEnv,
2026-03-19 09:08:57 -04:00
min_head_height: float,
min_pelvis_height: float,
max_angle_error: float,
standing_time: float,
velocity_threshold: float = 0.15
2026-03-19 06:29:30 -04:00
) -> torch.Tensor:
2026-03-19 09:08:57 -04:00
"""判定逻辑:双高度达标 + 躯干垂直 + 全身静止"""
head_idx, _ = env.scene["robot"].find_bodies("H2")
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
2026-03-19 06:29:30 -04:00
2026-03-19 09:08:57 -04:00
current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
2026-03-19 06:29:30 -04:00
2026-03-19 09:08:57 -04:00
gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)
2026-03-19 06:29:30 -04:00
2026-03-19 09:08:57 -04:00
# 判定条件:头够高 且 盆骨够高 且 垂直误差小 且 速度低
is_stable_now = (
(current_head_h > min_head_height) &
(current_pelvis_h > min_pelvis_height) &
(gravity_error < max_angle_error) &
(root_vel_norm < velocity_threshold)
)
2026-03-19 09:08:57 -04:00
if "stable_timer" not in env.extras:
env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)
dt = env.physics_dt * env.cfg.decimation
env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
torch.zeros_like(env.extras["stable_timer"]))
return env.extras["stable_timer"] > standing_time
2026-03-19 09:25:20 -04:00
def feet_airtime_penalty_local(
env: ManagerBasedRLEnv,
sensor_cfg: SceneEntityCfg,
threshold: float = 1.0
) -> torch.Tensor:
"""
自定义滞空惩罚逻辑
如果脚部的垂直合力小于阈值说明脚离地了
返回一个 Tensor离地时为 1.0着地时为 0.0
"""
# 1. 获取传感器对象
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
if contact_sensor is None:
# 如果没搜到传感器,返回全 0防止程序崩溃
return torch.zeros(env.num_envs, device=env.device)
# 2. 获取触地力 (num_envs, num_bodies_in_sensor, 3)
# 我们取所有被监测 Body (左右脚) 的 Z 轴推力
# 如果所有脚的力都小于 threshold判定为“完全腾空”
foot_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
is_in_air = torch.all(foot_forces_z < threshold, dim=-1)
return is_in_air.float()
def root_vel_z_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
# 严厉惩罚 Z 轴正向速度(向上窜)
vel_z = env.scene[asset_cfg.name].data.root_lin_vel_w[:, 2]
return torch.square(torch.clamp(vel_z, min=0.0))
2026-03-19 09:08:57 -04:00
# --- 2. 配置类 ---
T1_JOINT_NAMES = [
2026-03-19 06:29:30 -04:00
'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
]
2026-03-19 06:29:30 -04:00
2026-03-16 05:00:20 -04:00
@configclass
class T1ObservationCfg:
@configclass
class PolicyCfg(ObsGroup):
concatenate_terms = True
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
projected_gravity = ObsTerm(func=mdp.projected_gravity)
2026-03-19 06:29:30 -04:00
joint_pos = ObsTerm(func=mdp.joint_pos_rel,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
joint_vel = ObsTerm(func=mdp.joint_vel_rel,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
2026-03-16 05:00:20 -04:00
actions = ObsTerm(func=mdp.last_action)
policy = PolicyCfg()
@configclass
class T1EventCfg:
2026-03-19 09:08:57 -04:00
"""随机初始化:支持趴、躺、侧身"""
2026-03-16 05:00:20 -04:00
reset_robot_rotation = EventTerm(
func=mdp.reset_root_state_uniform,
2026-03-16 05:00:20 -04:00
params={
"asset_cfg": SceneEntityCfg("robot"),
2026-03-16 05:46:49 -04:00
"pose_range": {
"roll": (0, 0),#(-1.57, 1.57),
2026-03-19 06:29:30 -04:00
"pitch": (-1.57, 1.57),#(-1.57, 1.57),
"yaw": (0, 0),#(-3.14, 3.14),
"x": (0.0, 0.0),
2026-03-16 05:46:49 -04:00
"y": (0.0, 0.0),
2026-03-18 06:36:40 -04:00
"z": (0.0, 0.0),
2026-03-16 05:46:49 -04:00
},
"velocity_range": {},
2026-03-16 05:00:20 -04:00
},
mode="reset",
)
2026-03-16 05:46:49 -04:00
@configclass
class T1ActionCfg:
2026-03-19 06:29:30 -04:00
"""关键修改:降低 scale 让动作变丝滑,增大阻尼效果"""
2026-03-16 05:46:49 -04:00
joint_pos = JointPositionActionCfg(
asset_name="robot",
joint_names=T1_JOINT_NAMES,
2026-03-19 06:29:30 -04:00
scale=0.2, # 从 0.5 降到 0.2,防止电机暴力抽搐
2026-03-16 05:46:49 -04:00
use_default_offset=True
)
2026-03-16 05:00:20 -04:00
2026-03-16 05:00:20 -04:00
@configclass
class T1GetUpRewardCfg:
2026-03-19 09:08:57 -04:00
# 1. 姿态基础奖 (引导身体变正)
2026-03-19 06:29:30 -04:00
upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0)
2026-03-19 09:08:57 -04:00
# 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地
height_with_feet = RewTerm(
func=standing_with_feet_reward,
2026-03-19 09:25:20 -04:00
weight=5.0, # 作为核心引导,增加权重
params={
2026-03-19 09:08:57 -04:00
"min_head_height": 1.10,
"min_pelvis_height": 0.65,
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"force_threshold": 30.0
}
2026-03-16 05:00:20 -04:00
)
2026-03-19 09:08:57 -04:00
# 3. 手臂撑地奖:辅助脱离地面阶段
arm_push_support = RewTerm(
func=arm_push_up_reward,
2026-03-19 09:25:20 -04:00
weight=3.0,
2026-03-19 09:08:57 -04:00
params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link"])}
)
2026-03-19 09:08:57 -04:00
# 4. 惩罚项
undesired_contacts = RewTerm(
func=mdp.undesired_contacts,
weight=-5.0,
params={
2026-03-19 09:08:57 -04:00
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["H2", "Trunk", ".*_Left", ".*_Right"]),
# 注意:这里我们排除了脚部,惩罚大腿/小腿/躯干着地
"threshold": 1.0
}
)
2026-03-19 09:25:20 -04:00
# 5. 抑制跳跃:严厉惩罚向上窜的速度
root_vel_z_penalty = RewTerm(
func=root_vel_z_l2_local,
weight=-10.0, # 增大负权重
params={"asset_cfg": SceneEntityCfg("robot")}
)
# 6. 抑制滞空 (Airtime Penalty)
# 如果脚离开地面,按时间扣分
feet_airtime = RewTerm(
func=feet_airtime_penalty_local,
weight=-10.0,
params={
"sensor_cfg": SceneEntityCfg("feet_contact_sensor"),
"threshold": 0.1, # 超过 0.2s 离地就开始扣分
}
)
# 7. 成功终极大奖
is_success = RewTerm(
2026-03-19 09:08:57 -04:00
func=lambda env, keys: env.termination_manager.get_term(keys),
2026-03-19 09:25:20 -04:00
weight=500.0,
2026-03-19 09:08:57 -04:00
params={"keys": "standing_success"}
)
2026-03-16 05:00:20 -04:00
2026-03-19 09:08:57 -04:00
2026-03-16 05:00:20 -04:00
@configclass
class T1GetUpTerminationsCfg:
time_out = DoneTerm(func=mdp.time_out)
2026-03-19 09:08:57 -04:00
# 失败判定:躯干倾斜超过 45 度重置
2026-03-19 06:29:30 -04:00
base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 0.785})
2026-03-19 09:08:57 -04:00
# 成功判定:双高度 + 稳定
standing_success = DoneTerm(
2026-03-19 06:29:30 -04:00
func=is_standing_still,
params={
2026-03-19 09:08:57 -04:00
"min_head_height": 1.05,
"min_pelvis_height": 0.75,
"max_angle_error": 0.15,
"standing_time": 0.8,
"velocity_threshold": 0.15
}
2026-03-16 05:00:20 -04:00
)
@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
2026-03-19 09:08:57 -04:00
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5) # 5090 性能全开
2026-03-16 05:00:20 -04:00
def __post_init__(self):
super().__post_init__()
2026-03-19 09:08:57 -04:00
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
2026-03-16 05:00:20 -04:00
observations = T1ObservationCfg()
rewards = T1GetUpRewardCfg()
terminations = T1GetUpTerminationsCfg()
events = T1EventCfg()
2026-03-16 05:46:49 -04:00
actions = T1ActionCfg()
2026-03-16 05:00:20 -04:00
2026-03-19 09:08:57 -04:00
episode_length_s = 6.0
2026-03-19 06:29:30 -04:00
decimation = 4