Amend some codes to init training for get up better

This commit is contained in:
2026-03-18 06:05:30 -04:00
parent 4933567ef8
commit 9f3ec9d67a
4 changed files with 95 additions and 31 deletions

View File

@@ -35,7 +35,7 @@ params:
normalize_input: True normalize_input: True
normalize_value: True normalize_value: True
value_bootstrap: True value_bootstrap: True
num_actors: 16384 # 同时训练的机器人数量 num_actors: 32768 # 同时训练的机器人数量
reward_shaper: reward_shaper:
scale_value: 1.0 scale_value: 1.0
normalize_advantage: True normalize_advantage: True
@@ -45,7 +45,7 @@ params:
lr_schedule: adaptive lr_schedule: adaptive
kl_threshold: 0.013 kl_threshold: 0.013
score_to_win: 20000 score_to_win: 20000
max_epochs: 5000 max_epochs: 500000
save_best_after: 50 save_best_after: 50
save_frequency: 100 save_frequency: 100
grad_norm: 1.0 grad_norm: 1.0

View File

@@ -27,11 +27,21 @@ def is_standing_still(
逻辑:高度达标且姿态垂直,持续时间超过 standing_time 则返回 True。 逻辑:高度达标且姿态垂直,持续时间超过 standing_time 则返回 True。
""" """
# 获取当前状态:高度 (Z轴) 和 投影重力 (前两个分量越小越垂直) # 获取当前状态:高度 (Z轴) 和 投影重力 (前两个分量越小越垂直)
current_height = env.scene["robot"].data.root_pos_w[:, 2]
# 1. 首先获取 H2 (头部) 的索引 (建议在环境初始化时获取一次,或者如下所示获取)
# find_bodies 返回 (indices, names)
head_idx, _ = env.scene["robot"].find_bodies("H2")
# 2. 修改后的位置获取逻辑
# data.body_link_pos_w 的维度是 (num_envs, num_bodies, 3)
# 我们取所有环境 (:),对应的头部索引 (head_idx[0]),以及 Z 轴坐标 (2)
current_head_height = env.scene["robot"].data.body_link_pos_w[:, head_idx[0], 2]
# 3. 姿态判定保持不变(通常依然以躯干 Trunk 的垂直度为准,因为头部可能会摆动)
gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1) gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
# 判断当前时刻是否“达标” # 4. 更新判断逻辑
is_stable_now = (current_height > minimum_height) & (gravity_error < max_angle_error) is_stable_now = (current_head_height > minimum_height) & (gravity_error < max_angle_error)
# 在 env.extras 中维护一个计时器 # 在 env.extras 中维护一个计时器
if "stable_timer" not in env.extras: if "stable_timer" not in env.extras:
@@ -72,6 +82,22 @@ def joint_vel_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> tor
vel = env.scene[asset_cfg.name].data.joint_vel vel = env.scene[asset_cfg.name].data.joint_vel
return torch.sum(torch.square(vel), dim=-1) return torch.sum(torch.square(vel), dim=-1)
def joint_pos_limits_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""惩罚关节接近或超过限位"""
# 获取关节位置 (num_envs, num_joints)
joint_pos = env.scene[asset_cfg.name].data.joint_pos
# 获取限位 (num_joints, 2) -> [lower, upper]
limits = env.scene[asset_cfg.name].data.soft_joint_pos_limits
lower_limits = limits[..., 0]
upper_limits = limits[..., 1]
# 计算超出限位的部分
out_of_lower = torch.clamp(lower_limits - joint_pos, min=0.0)
out_of_upper = torch.clamp(joint_pos - upper_limits, min=0.0)
# 返回超出量的平方和
return torch.sum(torch.square(out_of_lower + out_of_upper), dim=-1)
# --- 2. 配置类定义 --- # --- 2. 配置类定义 ---
## 1. 定义与你的类一致的关节列表 (按照 ROBOT_MOTORS 的顺序) ## 1. 定义与你的类一致的关节列表 (按照 ROBOT_MOTORS 的顺序)
@@ -131,12 +157,12 @@ class T1EventCfg:
params={ params={
"asset_cfg": SceneEntityCfg("robot"), "asset_cfg": SceneEntityCfg("robot"),
"pose_range": { "pose_range": {
"roll": (-1.57, 1.57), "roll": (0, 0),#(-1.57, 1.57),
"pitch": (-1.57, 1.57), "pitch": (1.57, 1.57),#(-1.57, 1.57),
"yaw": (-3.14, 3.14), "yaw": (0, 0),#(-3.14, 3.14),
"x": (0.0, 0.0), "x": (0.0, 0.0),
"y": (0.0, 0.0), "y": (0.0, 0.0),
"z": (0.0, 0.0), "z": (0.15, 0.15),
}, },
"velocity_range": {}, "velocity_range": {},
}, },
@@ -163,63 +189,94 @@ class T1GetUpRewardCfg:
# 相比 root_height_below_minimum这个函数会让机器人越接近目标高度得分越高且曲线平稳 # 相比 root_height_below_minimum这个函数会让机器人越接近目标高度得分越高且曲线平稳
height_tracking = RewTerm( height_tracking = RewTerm(
func=mdp.root_height_below_minimum, # 如果没有自定义函数,保留这个但调低权重 func=mdp.root_height_below_minimum, # 如果没有自定义函数,保留这个但调低权重
weight=5.0, # 降低权重,防止“弹射” weight=2.0, # 降低权重,防止“弹射”
params={"minimum_height": 0.65} params={
"minimum_height": 1.05,
"asset_cfg": SceneEntityCfg("robot", body_names="H2"),
}
) )
# 2. 姿态奖 (保持不变,这是核心) # 2. 姿态奖 (保持不变,这是核心)
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0) upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0)
# 3. 稳定性引导 (增加对速度的惩罚,抑制跳跃) # 3. 稳定性引导 (增加对速度的惩罚,抑制跳跃)
# 惩罚过大的垂直速度,防止“跳起” # 惩罚过大的垂直速度,防止“跳起”
root_vel_z_penalty = RewTerm( root_vel_z_penalty = RewTerm(
func=root_vel_z_l2_local, # 使用本地函数 func=root_vel_z_l2_local, # 使用本地函数
weight=-2.0, weight=-5,
params={"asset_cfg": SceneEntityCfg("robot")} # 传入资产配置 params={"asset_cfg": SceneEntityCfg("robot")} # 传入资产配置
) )
feet_contact = RewTerm(
func=mdp.contact_forces,
weight=0.5,
params={
"sensor_cfg": SceneEntityCfg("feet_contact_sensor"),
"threshold": 1.0
}
)
# 4. 关节与能量约束 (防止 NaN 和乱跳的关键) # 4. 关节与能量约束 (防止 NaN 和乱跳的关键)
joint_vel = RewTerm( joint_vel = RewTerm(
func=joint_vel_l2_local, func=joint_vel_l2_local,
weight=-0.005, weight=-1,
params={"asset_cfg": SceneEntityCfg("robot")} params={"asset_cfg": SceneEntityCfg("robot")}
) )
applied_torque = RewTerm( applied_torque = RewTerm(
func=joint_torques_l2_local, func=joint_torques_l2_local,
weight=-1.0e-5, weight=-1.0e-2,
params={"asset_cfg": SceneEntityCfg("robot")} params={"asset_cfg": SceneEntityCfg("robot")}
) )
# 5. 动作平滑 (非常重要) # 5. 动作平滑 (非常重要)
action_rate = RewTerm( action_rate = RewTerm(
func=mdp.action_rate_l2, func=mdp.action_rate_l2,
weight=-0.05 # 增大权重,强制动作连贯 weight=-1.0 # 增大权重,强制动作连贯
) )
# 6. 核心终点奖励 # 6. 软限位惩罚:防止关节撞击
joint_limits = RewTerm(
func=joint_pos_limits_l2_local,
weight=-10.0,
params = {"asset_cfg": SceneEntityCfg("robot")}
)
# 7. 时间惩罚 (强制效率)
# 每一帧都扣除固定分数,迫使机器人尽快达成 is_success 以停止扣分
time_penalty = RewTerm(
func=mdp.is_alive,
weight=-1.2
)
# 8. 核心终点奖励
is_success = RewTerm( is_success = RewTerm(
func=get_success_reward, func=get_success_reward,
weight=1000.0, # 成功奖励可以给高点,但前提是动作要平稳 weight=500.0, # 成功奖励可以给高点,但前提是动作要平稳
params={"term_keys": "standing_success"} params={"term_keys": "standing_success"}
) )
# 7. 生存奖励 (保持微小正值即可)
is_alive = RewTerm(func=mdp.is_alive, weight=0.1)
@configclass @configclass
class T1GetUpTerminationsCfg: class T1GetUpTerminationsCfg:
"""终止条件:站稳即算任务完成""" """终止条件:站稳即算任务完成,且包含强制超时重置"""
# 失败:跌倒
# --- 关键必须显式添加这一行episode_length_s 才会生效 ---
time_out = DoneTerm(func=mdp.time_out)
# 失败:跌倒 (Trunk 倾斜过大)
# limit_angle 是弧度1.0 约等于 57度如果想严格点可以调小
base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 1.0}) base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 1.0})
# 成功:满足高度和角度要求,且维持 1.0 # 成功:满足“头部高度”和“姿态要求,且维持 0.8
standing_success = DoneTerm( standing_success = DoneTerm(
func=is_standing_still, func=is_standing_still, # 确保你已经把这个函数里的 current_height 改成了 H2 的 Z 轴
params={ params={
"minimum_height": 0.63, # T1 头部 (H2) 站直高度约 1.15-1.2m,设为 1.1m 比较稳健
"max_angle_error": 0.15, "minimum_height": 1.05,
"standing_time": 1.0 # 姿态误差 (投影重力分量)0.15 约等于 8.6 度,要求很直
"max_angle_error": 1.0,
# 维持时间
"standing_time": 0.8
} }
) )
@@ -232,7 +289,7 @@ class T1EnvCfg(ManagerBasedRLEnvCfg):
def __post_init__(self): def __post_init__(self):
super().__post_init__() super().__post_init__()
# 初始高度设低,配合随机旋转事件实现“从地上爬起来” # 初始高度设低,配合随机旋转事件实现“从地上爬起来”
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2) self.scene.robot.init_state.pos = (0.0, 0.0, 0.4)
observations = T1ObservationCfg() observations = T1ObservationCfg()
rewards = T1GetUpRewardCfg() rewards = T1GetUpRewardCfg()
@@ -240,5 +297,5 @@ class T1EnvCfg(ManagerBasedRLEnvCfg):
events = T1EventCfg() events = T1EventCfg()
actions = T1ActionCfg() actions = T1ActionCfg()
episode_length_s = 5.0 # 5秒强制重置 episode_length_s = 10.0 # 3秒强制重置
decimation = 4 # 控制频率 decimation = 4 # 控制频率

View File

@@ -1,5 +1,6 @@
from isaaclab.assets import ArticulationCfg, AssetBaseCfg from isaaclab.assets import ArticulationCfg, AssetBaseCfg
from isaaclab.scene import InteractiveSceneCfg from isaaclab.scene import InteractiveSceneCfg
from isaaclab.sensors import ContactSensorCfg
from isaaclab.utils import configclass from isaaclab.utils import configclass
from isaaclab.actuators import ImplicitActuatorCfg from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab import sim as sim_utils from isaaclab import sim as sim_utils
@@ -53,6 +54,12 @@ class T1SceneCfg(InteractiveSceneCfg):
}, },
) )
feet_contact_sensor = ContactSensorCfg(
prim_path="{ENV_REGEX_NS}/Robot/.*_foot_link", # 使用正则匹配所有脚部 link
update_period=0.0, # 随物理步长更新
history_length=3
)
# 3. 光照配置 # 3. 光照配置
light = AssetBaseCfg( light = AssetBaseCfg(
prim_path="/World/light", prim_path="/World/light",

View File

@@ -9,7 +9,7 @@ from isaaclab.app import AppLauncher
# 1. 配置启动参数 # 1. 配置启动参数
parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.") parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.")
parser.add_argument("--num_envs", type=int, default=16384, help="起身任务建议并行 4096 即可") parser.add_argument("--num_envs", type=int, default=32768, help="起身任务建议并行 4096 即可")
parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID") parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID")
parser.add_argument("--seed", type=int, default=42, help="随机种子") parser.add_argument("--seed", type=int, default=42, help="随机种子")
AppLauncher.add_app_launcher_args(parser) AppLauncher.add_app_launcher_args(parser)