diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index 8ae94f7..c1a801d 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -35,7 +35,7 @@ params: normalize_input: True normalize_value: True value_bootstrap: True - num_actors: 16384 # 同时训练的机器人数量 + num_actors: 32768 # 同时训练的机器人数量 reward_shaper: scale_value: 1.0 normalize_advantage: True @@ -45,7 +45,7 @@ params: lr_schedule: adaptive kl_threshold: 0.013 score_to_win: 20000 - max_epochs: 5000 + max_epochs: 500000 save_best_after: 50 save_frequency: 100 grad_norm: 1.0 diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 3c02b42..1950012 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -27,11 +27,21 @@ def is_standing_still( 逻辑:高度达标且姿态垂直,持续时间超过 standing_time 则返回 True。 """ # 获取当前状态:高度 (Z轴) 和 投影重力 (前两个分量越小越垂直) - current_height = env.scene["robot"].data.root_pos_w[:, 2] + + # 1. 首先获取 H2 (头部) 的索引 (建议在环境初始化时获取一次,或者如下所示获取) + # find_bodies 返回 (indices, names) + head_idx, _ = env.scene["robot"].find_bodies("H2") + + # 2. 修改后的位置获取逻辑 + # data.body_link_pos_w 的维度是 (num_envs, num_bodies, 3) + # 我们取所有环境 (:),对应的头部索引 (head_idx[0]),以及 Z 轴坐标 (2) + current_head_height = env.scene["robot"].data.body_link_pos_w[:, head_idx[0], 2] + + # 3. 姿态判定保持不变(通常依然以躯干 Trunk 的垂直度为准,因为头部可能会摆动) gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1) - # 判断当前时刻是否“达标” - is_stable_now = (current_height > minimum_height) & (gravity_error < max_angle_error) + # 4. 更新判断逻辑 + is_stable_now = (current_head_height > minimum_height) & (gravity_error < max_angle_error) # 在 env.extras 中维护一个计时器 if "stable_timer" not in env.extras: @@ -72,6 +82,22 @@ def joint_vel_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> tor vel = env.scene[asset_cfg.name].data.joint_vel return torch.sum(torch.square(vel), dim=-1) +def joint_pos_limits_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """惩罚关节接近或超过限位""" + # 获取关节位置 (num_envs, num_joints) + joint_pos = env.scene[asset_cfg.name].data.joint_pos + # 获取限位 (num_joints, 2) -> [lower, upper] + limits = env.scene[asset_cfg.name].data.soft_joint_pos_limits + lower_limits = limits[..., 0] + upper_limits = limits[..., 1] + + # 计算超出限位的部分 + out_of_lower = torch.clamp(lower_limits - joint_pos, min=0.0) + out_of_upper = torch.clamp(joint_pos - upper_limits, min=0.0) + + # 返回超出量的平方和 + return torch.sum(torch.square(out_of_lower + out_of_upper), dim=-1) + # --- 2. 配置类定义 --- ## 1. 定义与你的类一致的关节列表 (按照 ROBOT_MOTORS 的顺序) @@ -131,12 +157,12 @@ class T1EventCfg: params={ "asset_cfg": SceneEntityCfg("robot"), "pose_range": { - "roll": (-1.57, 1.57), - "pitch": (-1.57, 1.57), - "yaw": (-3.14, 3.14), + "roll": (0, 0),#(-1.57, 1.57), + "pitch": (1.57, 1.57),#(-1.57, 1.57), + "yaw": (0, 0),#(-3.14, 3.14), "x": (0.0, 0.0), "y": (0.0, 0.0), - "z": (0.0, 0.0), + "z": (0.15, 0.15), }, "velocity_range": {}, }, @@ -163,63 +189,94 @@ class T1GetUpRewardCfg: # 相比 root_height_below_minimum,这个函数会让机器人越接近目标高度得分越高,且曲线平稳 height_tracking = RewTerm( func=mdp.root_height_below_minimum, # 如果没有自定义函数,保留这个但调低权重 - weight=5.0, # 降低权重,防止“弹射” - params={"minimum_height": 0.65} + weight=2.0, # 降低权重,防止“弹射” + params={ + "minimum_height": 1.05, + "asset_cfg": SceneEntityCfg("robot", body_names="H2"), + } ) # 2. 姿态奖 (保持不变,这是核心) - upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0) + upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0) # 3. 稳定性引导 (增加对速度的惩罚,抑制跳跃) # 惩罚过大的垂直速度,防止“跳起” root_vel_z_penalty = RewTerm( func=root_vel_z_l2_local, # 使用本地函数 - weight=-2.0, + weight=-5, params={"asset_cfg": SceneEntityCfg("robot")} # 传入资产配置 ) + feet_contact = RewTerm( + func=mdp.contact_forces, + weight=0.5, + params={ + "sensor_cfg": SceneEntityCfg("feet_contact_sensor"), + "threshold": 1.0 + } + ) + # 4. 关节与能量约束 (防止 NaN 和乱跳的关键) joint_vel = RewTerm( func=joint_vel_l2_local, - weight=-0.005, + weight=-1, params={"asset_cfg": SceneEntityCfg("robot")} ) applied_torque = RewTerm( func=joint_torques_l2_local, - weight=-1.0e-5, + weight=-1.0e-2, params={"asset_cfg": SceneEntityCfg("robot")} ) # 5. 动作平滑 (非常重要) action_rate = RewTerm( func=mdp.action_rate_l2, - weight=-0.05 # 增大权重,强制动作连贯 + weight=-1.0 # 增大权重,强制动作连贯 ) - # 6. 核心终点奖励 + # 6. 软限位惩罚:防止关节撞击 + joint_limits = RewTerm( + func=joint_pos_limits_l2_local, + weight=-10.0, + params = {"asset_cfg": SceneEntityCfg("robot")} + ) + + # 7. 时间惩罚 (强制效率) + # 每一帧都扣除固定分数,迫使机器人尽快达成 is_success 以停止扣分 + time_penalty = RewTerm( + func=mdp.is_alive, + weight=-1.2 + ) + + # 8. 核心终点奖励 is_success = RewTerm( func=get_success_reward, - weight=1000.0, # 成功奖励可以给高点,但前提是动作要平稳 + weight=500.0, # 成功奖励可以给高点,但前提是动作要平稳 params={"term_keys": "standing_success"} ) - # 7. 生存奖励 (保持微小正值即可) - is_alive = RewTerm(func=mdp.is_alive, weight=0.1) - @configclass class T1GetUpTerminationsCfg: - """终止条件:站稳即算任务完成""" - # 失败:跌倒 + """终止条件:站稳即算任务完成,且包含强制超时重置""" + + # --- 关键:必须显式添加这一行,episode_length_s 才会生效 --- + time_out = DoneTerm(func=mdp.time_out) + + # 失败:跌倒 (Trunk 倾斜过大) + # limit_angle 是弧度,1.0 约等于 57度,如果想严格点可以调小 base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 1.0}) - # 成功:满足高度和角度要求,且维持 1.0 秒 + # 成功:满足“头部高度”和“姿态要求”,且维持 0.8 秒 standing_success = DoneTerm( - func=is_standing_still, + func=is_standing_still, # 确保你已经把这个函数里的 current_height 改成了 H2 的 Z 轴 params={ - "minimum_height": 0.63, - "max_angle_error": 0.15, - "standing_time": 1.0 + # T1 头部 (H2) 站直高度约 1.15-1.2m,设为 1.1m 比较稳健 + "minimum_height": 1.05, + # 姿态误差 (投影重力分量),0.15 约等于 8.6 度,要求很直 + "max_angle_error": 1.0, + # 维持时间 + "standing_time": 0.8 } ) @@ -232,7 +289,7 @@ class T1EnvCfg(ManagerBasedRLEnvCfg): def __post_init__(self): super().__post_init__() # 初始高度设低,配合随机旋转事件实现“从地上爬起来” - self.scene.robot.init_state.pos = (0.0, 0.0, 0.2) + self.scene.robot.init_state.pos = (0.0, 0.0, 0.4) observations = T1ObservationCfg() rewards = T1GetUpRewardCfg() @@ -240,5 +297,5 @@ class T1EnvCfg(ManagerBasedRLEnvCfg): events = T1EventCfg() actions = T1ActionCfg() - episode_length_s = 5.0 # 5秒强制重置 + episode_length_s = 10.0 # 3秒强制重置 decimation = 4 # 控制频率 \ No newline at end of file diff --git a/rl_game/get_up/env/t1_env.py b/rl_game/get_up/env/t1_env.py index 1cfda5d..c53c3b1 100644 --- a/rl_game/get_up/env/t1_env.py +++ b/rl_game/get_up/env/t1_env.py @@ -1,5 +1,6 @@ from isaaclab.assets import ArticulationCfg, AssetBaseCfg from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import ContactSensorCfg from isaaclab.utils import configclass from isaaclab.actuators import ImplicitActuatorCfg from isaaclab import sim as sim_utils @@ -53,6 +54,12 @@ class T1SceneCfg(InteractiveSceneCfg): }, ) + feet_contact_sensor = ContactSensorCfg( + prim_path="{ENV_REGEX_NS}/Robot/.*_foot_link", # 使用正则匹配所有脚部 link + update_period=0.0, # 随物理步长更新 + history_length=3 + ) + # 3. 光照配置 light = AssetBaseCfg( prim_path="/World/light", diff --git a/rl_game/get_up/train.py b/rl_game/get_up/train.py index ddda5fe..52871ee 100644 --- a/rl_game/get_up/train.py +++ b/rl_game/get_up/train.py @@ -9,7 +9,7 @@ from isaaclab.app import AppLauncher # 1. 配置启动参数 parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.") -parser.add_argument("--num_envs", type=int, default=16384, help="起身任务建议并行 4096 即可") +parser.add_argument("--num_envs", type=int, default=32768, help="起身任务建议并行 4096 即可") parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID") parser.add_argument("--seed", type=int, default=42, help="随机种子") AppLauncher.add_app_launcher_args(parser)