change model
This commit is contained in:
BIN
rl_game/get_up/asset/t1/T1_locomotion_physics_lab.usd
Normal file
BIN
rl_game/get_up/asset/t1/T1_locomotion_physics_lab.usd
Normal file
Binary file not shown.
Binary file not shown.
@@ -41,11 +41,11 @@ params:
|
||||
normalize_advantage: True
|
||||
gamma: 0.96
|
||||
tau: 0.95
|
||||
learning_rate: 5e-4
|
||||
learning_rate: 3e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 1000000
|
||||
max_epochs: 200
|
||||
save_best_after: 50
|
||||
save_frequency: 100
|
||||
grad_norm: 0.5
|
||||
|
||||
@@ -52,10 +52,9 @@ def standing_with_feet_reward(
|
||||
def arm_push_up_reward(
|
||||
env: ManagerBasedRLEnv,
|
||||
sensor_cfg: SceneEntityCfg,
|
||||
height_threshold: float = 0.55,
|
||||
min_force: float = 15.0
|
||||
height_threshold: float = 0.65,
|
||||
min_force: float = 3.0
|
||||
) -> torch.Tensor:
|
||||
"""带几何限制的高标准手臂支撑奖励"""
|
||||
contact_sensor = env.scene.sensors.get(sensor_cfg.name)
|
||||
if contact_sensor is None:
|
||||
return torch.zeros(env.num_envs, device=env.device)
|
||||
@@ -84,10 +83,6 @@ def arm_push_up_reward(
|
||||
height_fade = torch.clamp((height_threshold - pelvis_pos_z) / 0.1, min=0.0, max=1.0)
|
||||
return total_reward * height_fade
|
||||
|
||||
def torso_pitch_reward(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
proj_gravity = env.scene[asset_cfg.name].data.projected_gravity_b
|
||||
return torch.square(proj_gravity[:, 0])
|
||||
|
||||
def is_standing_still(
|
||||
env: ManagerBasedRLEnv,
|
||||
min_head_height: float,
|
||||
@@ -123,12 +118,21 @@ def is_standing_still(
|
||||
|
||||
return env.extras["stable_timer"] > standing_time
|
||||
|
||||
def joint_deviation_l2(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
|
||||
"""计算关节相对于默认姿态(default_joint_pos)的偏差平方和"""
|
||||
# 获取当前关节位置相对于默认位置的差值
|
||||
# mdp.joint_pos_rel 返回的是 (current_pos - default_pos)
|
||||
diff = mdp.joint_pos_rel(env, asset_cfg)
|
||||
return torch.sum(torch.square(diff), dim=-1)
|
||||
|
||||
# --- 2. 配置类 ---
|
||||
|
||||
T1_JOINT_NAMES = [
|
||||
'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
|
||||
'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
|
||||
'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
|
||||
'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll',
|
||||
'AL1', 'AL2', 'AL3',
|
||||
'AR1', 'AR2', 'AR3'
|
||||
]
|
||||
|
||||
|
||||
@@ -184,7 +188,7 @@ class T1ActionCfg:
|
||||
@configclass
|
||||
class T1GetUpRewardCfg:
|
||||
# 1. 姿态基础奖 (引导身体变正)
|
||||
upright = RewTerm(func=mdp.flat_orientation_l2, weight=5.0)
|
||||
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0)
|
||||
|
||||
# 2. 【条件高度奖】:双高度判定(头+盆骨),且必须脚踩地
|
||||
height_with_feet = RewTerm(
|
||||
@@ -205,8 +209,8 @@ class T1GetUpRewardCfg:
|
||||
weight=15.0, # 显著增加权重(从 3.0 提到 15.0),让它成为起步的关键
|
||||
params={
|
||||
"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
|
||||
"height_threshold": 0.6, # 躯干升到 0.6m 前都鼓励手臂用力
|
||||
"min_force": 10.0 # 只要有 15N 的力就触发
|
||||
"height_threshold": 0.65, # 躯干升到 0.6m 前都鼓励手臂用力
|
||||
"min_force": 3.0 # 只要有 15N 的力就触发
|
||||
}
|
||||
)
|
||||
|
||||
@@ -217,13 +221,6 @@ class T1GetUpRewardCfg:
|
||||
params={"asset_cfg": SceneEntityCfg("robot")}
|
||||
)
|
||||
|
||||
# 4. 新增:躯干仰角奖 (核心诱导)
|
||||
torso_pitch = RewTerm(
|
||||
func=torso_pitch_reward, # 或者使用 orientation 相关的项
|
||||
weight=15.0,
|
||||
params={"asset_cfg": SceneEntityCfg("robot")}
|
||||
)
|
||||
|
||||
# 5. 时间惩罚 (强制效率)
|
||||
time_penalty = RewTerm(
|
||||
func=mdp.is_alive,
|
||||
@@ -237,6 +234,21 @@ class T1GetUpRewardCfg:
|
||||
params={"keys": "standing_success"}
|
||||
)
|
||||
|
||||
# 7. 手臂关节活跃度奖 (诱导摆动)
|
||||
arm_movement_exploration = RewTerm(
|
||||
func=mdp.joint_vel_l2,
|
||||
weight=2, # 权重不要太高,防止变成“风扇”
|
||||
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["AL3", "AR3"])}
|
||||
)
|
||||
|
||||
# 8. 手臂位置多样性奖 (离开默认折叠姿态)
|
||||
arm_deviation_bonus = RewTerm(
|
||||
func=joint_deviation_l2,
|
||||
weight=1,
|
||||
params={
|
||||
"asset_cfg": SceneEntityCfg("robot", joint_names=["AL3", "AR3"])
|
||||
}
|
||||
)
|
||||
|
||||
@configclass
|
||||
class T1GetUpTerminationsCfg:
|
||||
|
||||
2
rl_game/get_up/env/t1_env.py
vendored
2
rl_game/get_up/env/t1_env.py
vendored
@@ -8,7 +8,7 @@ from isaaclab import sim as sim_utils
|
||||
import os
|
||||
|
||||
_DEMO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "t1_locomotion_physics.usd")
|
||||
T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "T1_locomotion_physics_lab.usd")
|
||||
|
||||
@configclass
|
||||
class T1SceneCfg(InteractiveSceneCfg):
|
||||
|
||||
Reference in New Issue
Block a user