From 2e2d68a93366d5b2fe68dbb8f7078bb2d2753609 Mon Sep 17 00:00:00 2001 From: ChenXi Date: Sun, 22 Mar 2026 02:26:16 -0400 Subject: [PATCH] change the reward remove arm disturbance --- rl_game/get_up/config/ppo_cfg.yaml | 6 +++--- rl_game/get_up/config/t1_env_cfg.py | 22 ++++------------------ 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml index 5c1124f..ca98060 100644 --- a/rl_game/get_up/config/ppo_cfg.yaml +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -39,13 +39,13 @@ params: reward_shaper: scale_value: 1.0 normalize_advantage: True - gamma: 0.96 + gamma: 0.98 tau: 0.95 - learning_rate: 3e-4 + learning_rate: 5e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 20000 - max_epochs: 200 + max_epochs: 500 save_best_after: 50 save_frequency: 100 grad_norm: 0.5 diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py index 8abe31b..d39a1ab 100644 --- a/rl_game/get_up/config/t1_env_cfg.py +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -53,7 +53,7 @@ def universal_arm_support_reward( env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, height_threshold: float = 0.60, - min_force: float = 2.0 + min_force: float = 15.0 ) -> torch.Tensor: """ 通用手臂支撑奖励:同时支持仰卧起坐支撑和俯卧撑起。 @@ -227,11 +227,11 @@ class T1GetUpRewardCfg: # 3. 手臂撑地奖:辅助脱离地面阶段 arm_push_support = RewTerm( func=universal_arm_support_reward, - weight=20.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键 + weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键 params={ "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]), "height_threshold": 0.65, # 躯干升到 0.6m 前都鼓励手臂用力 - "min_force": 3.0 # 只要有 15N 的力就触发 + "min_force": 8.0 # 只要有 15N 的力就触发 } ) @@ -251,24 +251,10 @@ class T1GetUpRewardCfg: # 6. 成功终极大奖 is_success = RewTerm( func=lambda env, keys: env.termination_manager.get_term(keys).float(), - weight=500.0, + weight=1000.0, params={"keys": "standing_success"} ) - # 7. 手臂关节活跃度奖 (诱导摆动) - arm_movement_exploration = RewTerm( - func=mdp.joint_vel_l2, - weight=2, # 权重不要太高,防止变成“风扇” - params={"asset_cfg": SceneEntityCfg("robot",joint_names=["Left_Shoulder.*", "Left_Elbow.*", "Right_Shoulder.*", "Right_Elbow.*"])} - ) - - # 8. 手臂位置多样性奖 (离开默认折叠姿态) - arm_deviation_bonus = RewTerm( - func=joint_deviation_l2, - weight=1, - params={"asset_cfg": SceneEntityCfg("robot",joint_names=["Left_Shoulder.*", "Left_Elbow.*", "Right_Shoulder.*", "Right_Elbow.*"])} - ) - @configclass class T1GetUpTerminationsCfg: time_out = DoneTerm(func=mdp.time_out)