diff --git a/rl_game/get_up/asset/t1/T1_locomotion_physics_lab.usd b/rl_game/get_up/asset/t1/T1_locomotion_physics_lab.usd new file mode 100644 index 0000000..65e3052 Binary files /dev/null and b/rl_game/get_up/asset/t1/T1_locomotion_physics_lab.usd differ diff --git a/rl_game/get_up/asset/t1/t1_locomotion_physics.usd b/rl_game/get_up/asset/t1/t1_locomotion_physics.usd deleted file mode 100644 index 654f8b7..0000000 Binary files a/rl_game/get_up/asset/t1/t1_locomotion_physics.usd and /dev/null differ diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index 5c28ead..18bc16b 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -41,11 +41,11 @@ params: normalize_advantage: True gamma: 0.96 tau: 0.95 - learning_rate: 5e-4 + learning_rate: 3e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 20000 - max_epochs: 1000000 + max_epochs: 200 save_best_after: 50 save_frequency: 100 grad_norm: 0.5 diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 097f8b5..6d63ecc 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -52,10 +52,9 @@ def standing_with_feet_reward( def arm_push_up_reward( env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, - height_threshold: float = 0.55, - min_force: float = 15.0 + height_threshold: float = 0.65, + min_force: float = 3.0 ) -> torch.Tensor: - """带几何限制的高标准手臂支撑奖励""" contact_sensor = env.scene.sensors.get(sensor_cfg.name) if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device) @@ -84,10 +83,6 @@ def arm_push_up_reward( height_fade = torch.clamp((height_threshold - pelvis_pos_z) / 0.1, min=0.0, max=1.0) return total_reward * height_fade -def torso_pitch_reward(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: - proj_gravity = env.scene[asset_cfg.name].data.projected_gravity_b - return torch.square(proj_gravity[:, 0]) - def is_standing_still( env: ManagerBasedRLEnv, min_head_height: float, @@ -123,12 +118,21 @@ def is_standing_still( return env.extras["stable_timer"] > standing_time +def joint_deviation_l2(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: + """计算关节相对于默认姿态(default_joint_pos)的偏差平方和""" + # 获取当前关节位置相对于默认位置的差值 + # mdp.joint_pos_rel 返回的是 (current_pos - default_pos) + diff = mdp.joint_pos_rel(env, asset_cfg) + return torch.sum(torch.square(diff), dim=-1) + # --- 2. 配置类 --- T1_JOINT_NAMES = [ 'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll', 'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch', - 'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll' + 'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll', + 'AL1', 'AL2', 'AL3', + 'AR1', 'AR2', 'AR3' ] @@ -184,7 +188,7 @@ class T1ActionCfg: @configclass class T1GetUpRewardCfg: # 1. 姿态基础奖 (引导身体变正) - upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0) + upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0) # 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地 height_with_feet = RewTerm( @@ -205,8 +209,8 @@ class T1GetUpRewardCfg: weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键 params={ "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]), - "height_threshold": 0.6, # 躯干升到 0.6m 前都鼓励手臂用力 - "min_force": 10.0 # 只要有 15N 的力就触发 + "height_threshold": 0.65, # 躯干升到 0.6m 前都鼓励手臂用力 + "min_force": 3.0 # 只要有 15N 的力就触发 } ) @@ -217,13 +221,6 @@ class T1GetUpRewardCfg: params={"asset_cfg": SceneEntityCfg("robot")} ) - # 4. 新增:躯干仰角奖 (核心诱导) - torso_pitch = RewTerm( - func=torso_pitch_reward, # 或者使用 orientation 相关的项 - weight=15.0, - params={"asset_cfg": SceneEntityCfg("robot")} - ) - # 5. 时间惩罚 (强制效率) time_penalty = RewTerm( func=mdp.is_alive, @@ -237,6 +234,21 @@ class T1GetUpRewardCfg: params={"keys": "standing_success"} ) + # 7. 手臂关节活跃度奖 (诱导摆动) + arm_movement_exploration = RewTerm( + func=mdp.joint_vel_l2, + weight=2, # 权重不要太高,防止变成“风扇” + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["AL3", "AR3"])} + ) + + # 8. 手臂位置多样性奖 (离开默认折叠姿态) + arm_deviation_bonus = RewTerm( + func=joint_deviation_l2, + weight=1, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=["AL3", "AR3"]) + } + ) @configclass class T1GetUpTerminationsCfg: diff --git a/rl_game/get_up/env/t1_env.py b/rl_game/get_up/env/t1_env.py index 8a96266..9dd1cf0 100644 --- a/rl_game/get_up/env/t1_env.py +++ b/rl_game/get_up/env/t1_env.py @@ -8,7 +8,7 @@ from isaaclab import sim as sim_utils import os _DEMO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) -T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "t1_locomotion_physics.usd") +T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "T1_locomotion_physics_lab.usd") @configclass class T1SceneCfg(InteractiveSceneCfg):