rl_game/get_up/config/t1_env_cfg.py

import torch
import random
import numpy as np
import isaaclab.envs.mdp as mdp
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg


# --- 1. 自定义 MDP 逻辑函数 ---

def standing_with_feet_reward(
        env: ManagerBasedRLEnv,
        min_head_height: float,
        min_pelvis_height: float,
        sensor_cfg: SceneEntityCfg,
        force_threshold: float = 20.0,
        max_v_z: float = 0.5
) -> torch.Tensor:
    """终极高度目标：头高、盆骨高、足部受力稳定"""
    head_idx, _ = env.scene["robot"].find_bodies("H2")
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")

    curr_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
    curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]

    # 归一化高度评分
    head_score = torch.clamp(curr_head_h / min_head_height, 0.0, 1.2)
    pelvis_score = torch.clamp(curr_pelvis_h / min_pelvis_height, 0.0, 1.2)
    height_reward = (head_score + pelvis_score) / 2.0

    # 足部受力判定
    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
    if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device)

    foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
    force_weight = torch.sigmoid((foot_forces_z - force_threshold) / 5.0)

    # 垂直速度惩罚（防止跳跃不稳）
    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
    vel_penalty = torch.exp(-torch.abs(root_vel_z) / max_v_z)

    return height_reward * (0.5 + 0.5 * force_weight * vel_penalty)


def dynamic_getup_strategy_reward(env: ManagerBasedRLEnv) -> torch.Tensor:
    """
    全姿态对称起立策略：
    1. 核心蜷缩 (Spring Loading)：无论仰卧还是俯卧，只要高度低，就必须强制收腿。
    2. 仰卧支撑 (Back-Pushing)：在仰卧状态下，鼓励手臂向后发力并抬高盆骨。
    3. 协同爆发 (Explosive Jump)：蜷缩状态下产生的向上动量获得最高倍率奖励。
    """
    # --- 1. 获取物理状态 ---
    gravity_z = env.scene["robot"].data.projected_gravity_b[:, 2]  # 1:仰卧, -1:俯卧
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]

    # 关节索引：11,12髋, 17,18膝 (确保与T1模型一致)
    knee_joints = [17, 18]
    hip_pitch_joints = [11, 12]
    joint_pos = env.scene["robot"].data.joint_pos

    # --- 2. 核心蜷缩评分 (Crouch Score) ---
    # 无论仰俯，蜷缩是起立的绝对前提。目标是让脚尽可能靠近质心。
    # 提高膝盖弯曲目标 (1.5 rad)，引导更深度的折叠
    knee_flex_err = torch.abs(joint_pos[:, knee_joints] - 1.5).sum(dim=-1)
    hip_flex_err = torch.abs(joint_pos[:, hip_pitch_joints] - 1.2).sum(dim=-1)
    crouch_score = torch.exp(-(knee_flex_err + hip_flex_err) * 0.6)

    # 基础蜷缩奖励 (Spring Base) - 权重加大
    crouch_trigger = torch.clamp(0.6 - curr_pelvis_h, min=0.0)
    base_crouch_reward = crouch_trigger * crouch_score * 40.0

    # --- 3. 支撑力奖励 (Support Force) ---
    push_reward = torch.zeros_like(curr_pelvis_h)
    contact_sensor = env.scene.sensors.get("contact_sensor")
    if contact_sensor is not None:
        # 监测非足部Link（手、臂）的受力
        # 无论正反，只要手能提供垂直向上的推力，就是好手
        arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
        push_reward = torch.tanh(torch.max(arm_forces_z, dim=-1)[0] / 30.0)

    # --- 4. 姿态特定引导 (Orientation-Neutral) ---
    is_back = torch.clamp(gravity_z, min=0.0)  # 仰卧程度
    is_belly = torch.clamp(-gravity_z, min=0.0)  # 俯卧程度

    # A. 仰卧直接起立逻辑：
    # 在仰卧时，如果能把盆骨撑起来 (curr_pelvis_h 增加)，给予重奖
    # 配合crouch_score，鼓励“收腿-撑地-挺髋”的动作链
    back_lift_reward = is_back * torch.clamp(curr_pelvis_h - 0.15, min=0.0) * crouch_score * 50.0

    # B. 俯卧/翻身辅助逻辑 (保留一定的翻身倾向，但不再是唯一路径)
    flip_reward = is_back * (1.0 - gravity_z) * 5.0  # 权重降低，仅作为备选

    # --- 5. 最终爆发项 (The Jump) ---
    # 核心公式：蜷缩程度 * 向上速度 * 支撑力感应
    # 这是一个通用的“起跳”奖励，无论正反面，只要满足“缩得紧、跳得快、手有撑”，奖励就爆炸
    explosion_reward = crouch_score * torch.clamp(root_vel_z, min=0.0) * (0.5 + 0.5 * push_reward) * 80.0

    # --- 6. 汇总 ---
    total_reward = (
            base_crouch_reward +  # 必须缩腿
            back_lift_reward +  # 仰卧挺髋
            flip_reward +  # 翻身尝试
            explosion_reward  # 终极爆发
    )

    return total_reward

def is_standing_still(
        env: ManagerBasedRLEnv,
        min_head_height: float,
        min_pelvis_height: float,
        max_angle_error: float,
        standing_time: float,
        velocity_threshold: float = 0.15
) -> torch.Tensor:
    head_idx, _ = env.scene["robot"].find_bodies("H2")
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")

    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]

    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)

    is_stable_now = (
            (current_head_h > min_head_height) &
            (current_pelvis_h > min_pelvis_height) &
            (gravity_error < max_angle_error) &
            (root_vel_norm < velocity_threshold)
    )

    if "stable_timer" not in env.extras:
        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)

    dt = env.physics_dt * env.cfg.decimation
    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
                                             torch.zeros_like(env.extras["stable_timer"]))

    return env.extras["stable_timer"] > standing_time


# --- 2. 配置类 ---

T1_JOINT_NAMES = [
    'AAHead_yaw', 'Head_pitch',
    'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
    'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
    'Waist',
    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
]


@configclass
class T1ObservationCfg:
    @configclass
    class PolicyCfg(ObsGroup):
        concatenate_terms = True
        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
        projected_gravity = ObsTerm(func=mdp.projected_gravity)
        root_pos = ObsTerm(func=mdp.root_pos_w)
        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        actions = ObsTerm(func=mdp.last_action)

    policy = PolicyCfg()


@configclass
class T1EventCfg:
    reset_robot_rotation = EventTerm(
        func=mdp.reset_root_state_uniform,
        params={
            "asset_cfg": SceneEntityCfg("robot"),
            "pose_range": {
                "roll": (-1.57, 1.57),
                "pitch": tuple(np.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])),
                "yaw": (-3.14, 3.14),
                "x": (0.0, 0.0),
                "y": (0.0, 0.0),
                "z": (0.3, 0.4),
            },
            "velocity_range": {},
        },
        mode="reset",
    )


@configclass
class T1ActionCfg:
    # 拆分动作组以防止抽搐。由于不强制规定动作，我们可以给各个部位较为均衡的探索范围。
    arm_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=[
            'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
            'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw'
        ],
        scale=1.2,  # 给了手臂相对充裕的自由度去摸索
        use_default_offset=True
    )

    torso_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=['Waist', 'AAHead_yaw', 'Head_pitch'],
        scale=0.8,
        use_default_offset=True
    )

    leg_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=[
            'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
            'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
            'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
        ],
        scale=0.6,
        use_default_offset=True
    )


@configclass
class T1GetUpRewardCfg:
    # 1. 核心阶段性引导 (翻身 -> 蜷缩 -> 支撑)
    dynamic_strategy = RewTerm(
        func=dynamic_getup_strategy_reward,
        weight=1.5
    )

    # 2. 站立质量奖励 (强化双脚受力)
    height_with_feet = RewTerm(
        func=standing_with_feet_reward,
        weight=40.0,  # 大权重
        params={
            "min_head_height": 1.1,
            "min_pelvis_height": 0.7,
            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
            "force_threshold": 40.0,  # 必须达到一定压力，防止脚尖点地作弊
            "max_v_z": 0.2
        }
    )

    # 3. 惩罚项：防止钻空子
    # 严厉惩罚：如果躯干(Trunk)或头(H2)直接接触地面，扣大分
    body_contact_penalty = RewTerm(
        func=mdp.contact_forces,
        weight=-20.0,
        params={
            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk", "H2"]),
            "threshold": 1.0
        }
    )

    # 4. 关节功耗惩罚 (防止高频抽搐)
    action_rate = RewTerm(
        func=mdp.action_rate_l2,
        weight=-0.01
    )

    # 5. 成功维持奖励
    is_success_maintain = RewTerm(
        func=is_standing_still,
        weight=1000.0,  # 巨大的成功奖励
        params={
            "min_head_height": 1.08,
            "min_pelvis_height": 0.72,
            "max_angle_error": 0.2,
            "standing_time": 0.4,  # 必须站稳 0.4s
            "velocity_threshold": 0.3
        }
    )


@configclass
class T1GetUpTerminationsCfg:
    time_out = DoneTerm(func=mdp.time_out)
    standing_success = DoneTerm(
        func=is_standing_still,
        params={
            "min_head_height": 1.05,
            "min_pelvis_height": 0.75,
            "max_angle_error": 0.3,
            "standing_time": 0.2,
            "velocity_threshold": 0.5
        }
    )


@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
    scene = T1SceneCfg(num_envs=8192, env_spacing=2.5)

    observations = T1ObservationCfg()
    rewards = T1GetUpRewardCfg()
    terminations = T1GetUpTerminationsCfg()
    events = T1EventCfg()
    actions = T1ActionCfg()

    episode_length_s = 10.0
    decimation = 4
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								import torch
-												Amend bugs

											
										
										
											2026-03-20 07:03:41 -04:00
+								import random
-												Amend tiny bug

											
										
										
											2026-03-20 08:12:08 -04:00
+								import numpy as np
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								import isaaclab.envs.mdp as mdp
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								from isaaclab.assets import ArticulationCfg
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								from isaaclab.managers import ObservationGroupCfg as ObsGroup
 								from isaaclab.managers import ObservationTermCfg as ObsTerm
 								from isaaclab.managers import RewardTermCfg as RewTerm
 								from isaaclab.managers import TerminationTermCfg as DoneTerm
 								from isaaclab.managers import EventTermCfg as EventTerm
 								from isaaclab.envs.mdp import JointPositionActionCfg
 								from isaaclab.managers import SceneEntityCfg
 								from isaaclab.utils import configclass
 								from rl_game.get_up.env.t1_env import T1SceneCfg
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								# --- 1. 自定义 MDP 逻辑函数 ---
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								def standing_with_feet_reward(
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        env: ManagerBasedRLEnv,
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        min_head_height: float,
 								        min_pelvis_height: float,
 								        sensor_cfg: SceneEntityCfg,
-												change reward function

											
										
										
											2026-03-21 07:00:49 -04:00
+								        force_threshold: float = 20.0,
 								        max_v_z: float = 0.5
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								) -> torch.Tensor:
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    """终极高度目标：头高、盆骨高、足部受力稳定"""
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								    head_idx, _ = env.scene["robot"].find_bodies("H2")
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								    curr_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
 								    curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-												change reward function

											
										
										
											2026-03-21 07:00:49 -04:00
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    # 归一化高度评分
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								    head_score = torch.clamp(curr_head_h / min_head_height, 0.0, 1.2)
 								    pelvis_score = torch.clamp(curr_pelvis_h / min_pelvis_height, 0.0, 1.2)
-												change reward function

											
										
										
											2026-03-21 07:00:49 -04:00
+								    height_reward = (head_score + pelvis_score) / 2.0
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    # 足部受力判定
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								    if contact_sensor is None: return torch.zeros(env.num_envs, device=env.device)
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
-												change reward function

											
										
										
											2026-03-21 07:00:49 -04:00
+								    force_weight = torch.sigmoid((foot_forces_z - force_threshold) / 5.0)
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    # 垂直速度惩罚（防止跳跃不稳）
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
 								    vel_penalty = torch.exp(-torch.abs(root_vel_z) / max_v_z)
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								    return height_reward * (0.5 + 0.5 * force_weight * vel_penalty)
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
-												change arm to push the ground reward function

											
										
										
											2026-03-21 08:38:17 -04:00
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								def dynamic_getup_strategy_reward(env: ManagerBasedRLEnv) -> torch.Tensor:
-												Amend arm push reward

											
										
										
											2026-03-22 02:19:29 -04:00
+								    """
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    全姿态对称起立策略：
 . 核心蜷缩 (Spring Loading)：无论仰卧还是俯卧，只要高度低，就必须强制收腿。
 . 仰卧支撑 (Back-Pushing)：在仰卧状态下，鼓励手臂向后发力并抬高盆骨。
 . 协同爆发 (Explosive Jump)：蜷缩状态下产生的向上动量获得最高倍率奖励。
-												Amend arm push reward

											
										
										
											2026-03-22 02:19:29 -04:00
+								    """
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # --- 1. 获取物理状态 ---
 								    gravity_z = env.scene["robot"].data.projected_gravity_b[:, 2]  # 1:仰卧, -1:俯卧
 								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
 								    curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
 								    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 关节索引：11,12髋, 17,18膝 (确保与T1模型一致)
 								    knee_joints = [17, 18]
 								    hip_pitch_joints = [11, 12]
 								    joint_pos = env.scene["robot"].data.joint_pos
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # --- 2. 核心蜷缩评分 (Crouch Score) ---
 								    # 无论仰俯，蜷缩是起立的绝对前提。目标是让脚尽可能靠近质心。
 								    # 提高膝盖弯曲目标 (1.5 rad)，引导更深度的折叠
 								    knee_flex_err = torch.abs(joint_pos[:, knee_joints] - 1.5).sum(dim=-1)
 								    hip_flex_err = torch.abs(joint_pos[:, hip_pitch_joints] - 1.2).sum(dim=-1)
 								    crouch_score = torch.exp(-(knee_flex_err + hip_flex_err) * 0.6)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 基础蜷缩奖励 (Spring Base) - 权重加大
 								    crouch_trigger = torch.clamp(0.6 - curr_pelvis_h, min=0.0)
 								    base_crouch_reward = crouch_trigger * crouch_score * 40.0
-												change arm to push the ground reward function

											
										
										
											2026-03-21 08:38:17 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # --- 3. 支撑力奖励 (Support Force) ---
 								    push_reward = torch.zeros_like(curr_pelvis_h)
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    contact_sensor = env.scene.sensors.get("contact_sensor")
 								    if contact_sensor is not None:
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        # 监测非足部Link（手、臂）的受力
 								        # 无论正反，只要手能提供垂直向上的推力，就是好手
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								        arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        push_reward = torch.tanh(torch.max(arm_forces_z, dim=-1)[0] / 30.0)
 								    # --- 4. 姿态特定引导 (Orientation-Neutral) ---
 								    is_back = torch.clamp(gravity_z, min=0.0)  # 仰卧程度
 								    is_belly = torch.clamp(-gravity_z, min=0.0)  # 俯卧程度
 								    # A. 仰卧直接起立逻辑：
 								    # 在仰卧时，如果能把盆骨撑起来 (curr_pelvis_h 增加)，给予重奖
 								    # 配合crouch_score，鼓励“收腿-撑地-挺髋”的动作链
 								    back_lift_reward = is_back * torch.clamp(curr_pelvis_h - 0.15, min=0.0) * crouch_score * 50.0
 								    # B. 俯卧/翻身辅助逻辑 (保留一定的翻身倾向，但不再是唯一路径)
 								    flip_reward = is_back * (1.0 - gravity_z) * 5.0  # 权重降低，仅作为备选
 								    # --- 5. 最终爆发项 (The Jump) ---
 								    # 核心公式：蜷缩程度 * 向上速度 * 支撑力感应
 								    # 这是一个通用的“起跳”奖励，无论正反面，只要满足“缩得紧、跳得快、手有撑”，奖励就爆炸
 								    explosion_reward = crouch_score * torch.clamp(root_vel_z, min=0.0) * (0.5 + 0.5 * push_reward) * 80.0
 								    # --- 6. 汇总 ---
 								    total_reward = (
 								            base_crouch_reward +  # 必须缩腿
 								            back_lift_reward +  # 仰卧挺髋
 								            flip_reward +  # 翻身尝试
 								            explosion_reward  # 终极爆发
 								    )
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    return total_reward
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								def is_standing_still(
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        env: ManagerBasedRLEnv,
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        min_head_height: float,
 								        min_pelvis_height: float,
 								        max_angle_error: float,
 								        standing_time: float,
 								        velocity_threshold: float = 0.15
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								) -> torch.Tensor:
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    head_idx, _ = env.scene["robot"].find_bodies("H2")
 								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
 								    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
 								    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    is_stable_now = (
 								            (current_head_h > min_head_height) &
 								            (current_pelvis_h > min_pelvis_height) &
 								            (gravity_error < max_angle_error) &
 								            (root_vel_norm < velocity_threshold)
 								    )
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    if "stable_timer" not in env.extras:
 								        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)
 								    dt = env.physics_dt * env.cfg.decimation
 								    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
 								                                             torch.zeros_like(env.extras["stable_timer"]))
 								    return env.extras["stable_timer"] > standing_time
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								# --- 2. 配置类 ---
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								T1_JOINT_NAMES = [
-												Amend symbol to save video memory

											
										
										
											2026-03-22 03:05:24 -04:00
+								    'AAHead_yaw', 'Head_pitch',
-												Amend bugs

											
										
										
											2026-03-22 00:01:21 -04:00
+								    'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
 								    'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
-												Amend symbol

											
										
										
											2026-03-22 02:57:04 -04:00
+								    'Waist',
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
 								    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
-												Amend symbol

											
										
										
											2026-03-22 02:57:04 -04:00
+								    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
 								class T1ObservationCfg:
 								    @configclass
 								    class PolicyCfg(ObsGroup):
 								        concatenate_terms = True
 								        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
 								        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
 								        projected_gravity = ObsTerm(func=mdp.projected_gravity)
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        root_pos = ObsTerm(func=mdp.root_pos_w)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
 								                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
 								        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
 								                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        actions = ObsTerm(func=mdp.last_action)
 								    policy = PolicyCfg()
 								@configclass
 								class T1EventCfg:
 								    reset_robot_rotation = EventTerm(
-												Amend bugs

											
										
										
											2026-03-20 07:03:41 -04:00
+								        func=mdp.reset_root_state_uniform,
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        params={
 								            "asset_cfg": SceneEntityCfg("robot"),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								            "pose_range": {
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								                "roll": (-1.57, 1.57),
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								                "pitch": tuple(np.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])),
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								                "yaw": (-3.14, 3.14),
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								                "x": (0.0, 0.0),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								                "y": (0.0, 0.0),
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								                "z": (0.3, 0.4),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								            },
-												Amend tiny bug

											
										
										
											2026-03-22 21:21:17 -04:00
+								            "velocity_range": {},
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        },
 								        mode="reset",
 								    )
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								@configclass
 								class T1ActionCfg:
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 拆分动作组以防止抽搐。由于不强制规定动作，我们可以给各个部位较为均衡的探索范围。
 								    arm_action = JointPositionActionCfg(
 								        asset_name="robot",
 								        joint_names=[
 								            'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
 								            'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw'
 								        ],
 								        scale=1.2,  # 给了手臂相对充裕的自由度去摸索
 								        use_default_offset=True
 								    )
 								    torso_action = JointPositionActionCfg(
 								        asset_name="robot",
 								        joint_names=['Waist', 'AAHead_yaw', 'Head_pitch'],
 								        scale=0.8,
 								        use_default_offset=True
 								    )
 								    leg_action = JointPositionActionCfg(
 								        asset_name="robot",
 								        joint_names=[
 								            'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
 								            'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
 								            'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
 								        ],
 								        scale=0.6,
 								        use_default_offset=True
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								    )
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
 								class T1GetUpRewardCfg:
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 1. 核心阶段性引导 (翻身 -> 蜷缩 -> 支撑)
 								    dynamic_strategy = RewTerm(
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								        func=dynamic_getup_strategy_reward,
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        weight=1.5
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    )
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 2. 站立质量奖励 (强化双脚受力)
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    height_with_feet = RewTerm(
 								        func=standing_with_feet_reward,
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        weight=40.0,  # 大权重
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								        params={
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								            "min_head_height": 1.1,
-												change reward function

											
										
										
											2026-03-21 07:00:49 -04:00
+								            "min_pelvis_height": 0.7,
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								            "force_threshold": 40.0,  # 必须达到一定压力，防止脚尖点地作弊
 								            "max_v_z": 0.2
 								        }
 								    )
 								    # 3. 惩罚项：防止钻空子
 								    # 严厉惩罚：如果躯干(Trunk)或头(H2)直接接触地面，扣大分
 								    body_contact_penalty = RewTerm(
 								        func=mdp.contact_forces,
 								        weight=-20.0,
 								        params={
 								            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk", "H2"]),
 								            "threshold": 1.0
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								        }
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								    )
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 4. 关节功耗惩罚 (防止高频抽搐)
 								    action_rate = RewTerm(
 								        func=mdp.action_rate_l2,
 								        weight=-0.01
 								    )
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 5. 成功维持奖励
 								    is_success_maintain = RewTerm(
-												Amend success rewards

											
										
										
											2026-03-22 02:32:58 -04:00
+								        func=is_standing_still,
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        weight=1000.0,  # 巨大的成功奖励
-												Amend success rewards

											
										
										
											2026-03-22 02:32:58 -04:00
+								        params={
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								            "min_head_height": 1.08,
 								            "min_pelvis_height": 0.72,
 								            "max_angle_error": 0.2,
 								            "standing_time": 0.4,  # 必须站稳 0.4s
 								            "velocity_threshold": 0.3
-												Amend success rewards

											
										
										
											2026-03-22 02:32:58 -04:00
+								        }
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								    )
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								class T1GetUpTerminationsCfg:
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								    time_out = DoneTerm(func=mdp.time_out)
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								    standing_success = DoneTerm(
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        func=is_standing_still,
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        params={
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								            "min_head_height": 1.05,
 								            "min_pelvis_height": 0.75,
-												change arm to push the ground reward function

											
										
										
											2026-03-21 08:38:17 -04:00
+								            "max_angle_error": 0.3,
 								            "standing_time": 0.2,
 								            "velocity_threshold": 0.5
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        }
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								    )
 								@configclass
 								class T1EnvCfg(ManagerBasedRLEnvCfg):
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    scene = T1SceneCfg(num_envs=8192, env_spacing=2.5)
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
 								    observations = T1ObservationCfg()
 								    rewards = T1GetUpRewardCfg()
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								    terminations = T1GetUpTerminationsCfg()
 								    events = T1EventCfg()
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								    actions = T1ActionCfg()
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    episode_length_s = 10.0
-												change reward function

											
										
										
											2026-03-21 07:00:49 -04:00
+								    decimation = 4