rl_game/get_up/config/t1_env_cfg.py

import random

import numpy
import numpy as np
import torch
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg
import isaaclab.envs.mdp as mdp


# --- 1. 自定义 MDP 逻辑函数 ---

def standing_with_feet_reward(
        env: ManagerBasedRLEnv,
        min_head_height: float,
        min_pelvis_height: float,
        sensor_cfg: SceneEntityCfg,
        force_threshold: float = 30.0,
        max_v_z: float = 0.25
) -> torch.Tensor:
    """
    平滑切换的高度奖励：
    低高度 -> 纯高度引导
    高高度 -> 高度 + 足底力 + 速度约束
    """
    # 1. 获取基本状态
    head_idx, _ = env.scene["robot"].find_bodies("H2")
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]

    # 2. 计算基础高度得分 (0.0 - 1.0)
    head_score = torch.clamp(current_head_h / min_head_height, max=1.0)
    pelvis_score = torch.clamp(current_pelvis_h / min_pelvis_height, max=1.0)
    combined_height_score = (head_score + pelvis_score) / 2.0

    # 3. 计算足底力判定
    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
    foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
    is_feet_on_ground = (foot_forces_z > force_threshold).float()

    # 4. 计算速度惩罚 (抑制乱跳)
    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
    vel_penalty_factor = torch.exp(-4.0 * torch.clamp(torch.abs(root_vel_z) - max_v_z, min=0.0))

    # --- 核心逻辑切换 ---
    # 定义一个“过渡高度” (例如盆骨达到 0.4m)
    transition_h = 0.4

    # 如果高度很低：给纯高度奖，诱导它向上动
    low_height_reward = combined_height_score

    # 如果高度较高：给 综合奖 (高度 * 速度限制 * 必须踩地)
    high_height_reward = combined_height_score * vel_penalty_factor * is_feet_on_ground

    return torch.where(current_pelvis_h < transition_h, low_height_reward, high_height_reward)


def arm_push_up_reward(
        env: ManagerBasedRLEnv,
        sensor_cfg: SceneEntityCfg,
        height_threshold: float = 0.5,
        min_force: float = 20.0
) -> torch.Tensor:
    """
    强化版手臂撑地奖励：
    1. 鼓励手臂产生超过阈值的垂直反作用力。
    2. 当手臂用力且躯干有向上速度时，给予额外加成。
    """
    # 获取手臂传感器数据
    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
    if contact_sensor is None:
        return torch.zeros(env.num_envs, device=env.device)

    # 1. 获取手臂 Z 轴受力 (取所有手臂 Body 的合力或最大力)
    arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
    max_arm_force = torch.max(arm_forces_z, dim=-1)[0]

    # 归一化受力奖励：在 20N 到 100N 之间线性增长
    force_reward = torch.clamp((max_arm_force - min_force) / 80.0, min=0.0, max=1.0)

    # 2. 获取躯干高度和垂直速度
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]

    # 3. 协同奖励：当手臂在用力推，且躯干正在上升时，给高分
    # 只有在高度低于阈值（还在撑起阶段）时生效
    pushing_up_bonus = torch.where(
        (max_arm_force > min_force) & (root_vel_z > 0.05),
        force_reward * (1.0 + root_vel_z * 2.0),  # 速度越快奖励越高
        force_reward
    )

    # 只有在躯干较低时才发放此奖励
    return torch.where(current_height < height_threshold,
                       pushing_up_bonus,
                       torch.zeros_like(pushing_up_bonus))

def is_standing_still(
        env: ManagerBasedRLEnv,
        min_head_height: float,
        min_pelvis_height: float,
        max_angle_error: float,
        standing_time: float,
        velocity_threshold: float = 0.15
) -> torch.Tensor:
    """判定逻辑：双高度达标 + 躯干垂直 + 全身静止"""
    head_idx, _ = env.scene["robot"].find_bodies("H2")
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")

    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]

    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)

    # 判定条件：头够高 且 盆骨够高 且 垂直误差小 且 速度低
    is_stable_now = (
            (current_head_h > min_head_height) &
            (current_pelvis_h > min_pelvis_height) &
            (gravity_error < max_angle_error) &
            (root_vel_norm < velocity_threshold)
    )

    if "stable_timer" not in env.extras:
        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)

    dt = env.physics_dt * env.cfg.decimation
    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
                                             torch.zeros_like(env.extras["stable_timer"]))

    return env.extras["stable_timer"] > standing_time

def feet_airtime_penalty_local(
        env: ManagerBasedRLEnv,
        sensor_cfg: SceneEntityCfg,
        threshold: float = 1.0
) -> torch.Tensor:
    """
    自定义滞空惩罚逻辑：
    如果脚部的垂直合力小于阈值，说明脚离地了。
    返回一个 Tensor，离地时为 1.0，着地时为 0.0。
    """
    # 1. 获取传感器对象
    contact_sensor = env.scene.sensors.get(sensor_cfg.name)

    if contact_sensor is None:
        # 如果没搜到传感器，返回全 0，防止程序崩溃
        return torch.zeros(env.num_envs, device=env.device)

    # 2. 获取触地力 (num_envs, num_bodies_in_sensor, 3)
    # 我们取所有被监测 Body (左右脚) 的 Z 轴推力
    # 如果所有脚的力都小于 threshold，判定为“完全腾空”
    foot_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
    is_in_air = torch.all(foot_forces_z < threshold, dim=-1)

    return is_in_air.float()

def root_vel_z_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
    # 严厉惩罚 Z 轴正向速度（向上窜）
    vel_z = env.scene[asset_cfg.name].data.root_lin_vel_w[:, 2]
    return torch.square(torch.clamp(vel_z, min=0.0))

def joint_pos_rel_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
    # 获取相对默认位置的偏差 (num_envs, num_joints)
    rel_pos = mdp.joint_pos_rel(env, asset_cfg)
    # 计算平方和 (L2)
    return torch.sum(torch.square(rel_pos), dim=-1)

def strict_feet_contact_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
    """如果脚不着地，直接给一个很大的负分，强制它必须寻找支点"""
    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
    # 只要有一只脚没力，就判定为不稳
    foot_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
    all_feet_cond = torch.min(foot_forces_z, dim=-1)[0] > 5.0  # 左右脚都要有至少5N的力

    return (~all_feet_cond).float()  # 返回1表示违规

# --- 2. 配置类 ---

T1_JOINT_NAMES = [
    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
]


@configclass
class T1ObservationCfg:
    @configclass
    class PolicyCfg(ObsGroup):
        concatenate_terms = True
        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
        projected_gravity = ObsTerm(func=mdp.projected_gravity)
        root_pos = ObsTerm(func=mdp.root_pos_w)
        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        actions = ObsTerm(func=mdp.last_action)

    policy = PolicyCfg()


@configclass
class T1EventCfg:
    reset_robot_rotation = EventTerm(
        func=mdp.reset_root_state_uniform,
        params={
            "asset_cfg": SceneEntityCfg("robot"),
            "pose_range": {
                "roll": (-1.57, 1.57),   # 左右侧卧
                "pitch": tuple(numpy.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])),  # 仰卧/俯卧
                "yaw": (-3.14, 3.14), # 全向旋转
                "x": (0.0, 0.0),
                "y": (0.0, 0.0),
                "z": (0.1, 0.2),
            },
            "velocity_range": {},
        },
        mode="reset",
    )


@configclass
class T1ActionCfg:
    """关键修改：降低 scale 让动作变丝滑，增大阻尼效果"""
    joint_pos = JointPositionActionCfg(
        asset_name="robot",
        joint_names=T1_JOINT_NAMES,
        scale=0.5,
        use_default_offset=True
    )


@configclass
class T1GetUpRewardCfg:
    # 1. 姿态基础奖 (引导身体变正)
    upright = RewTerm(func=mdp.flat_orientation_l2, weight=10.0)

    # 2. 【条件高度奖】：双高度判定（头+盆骨），且必须脚踩地
    height_with_feet = RewTerm(
        func=standing_with_feet_reward,
        weight=50.0,  # 作为核心引导，增加权重
        params={
            "min_head_height": 1.10,
            "min_pelvis_height": 0.65,
            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
            "force_threshold": 10.0
        }
    )

    # 3. 手臂撑地奖：辅助脱离地面阶段
    arm_push_support = RewTerm(
        func=arm_push_up_reward,
        weight=20.0,  # 显著增加权重（从 3.0 提到 15.0），让它成为起步的关键
        params={
            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
            "height_threshold": 0.6,  # 躯干升到 0.6m 前都鼓励手臂用力
            "min_force": 10.0  # 只要有 15N 的力就触发
        }
    )

    # 4. 惩罚项
    undesired_contacts = RewTerm(
        func=mdp.undesired_contacts,
        weight=-2.0,
        params={
            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["H2", "Trunk"]),
            # 注意：此处必须排除手臂相关 link，否则手臂用力时会同时被扣分
            "threshold": 1.0
        }
    )

    # 5. 抑制跳跃：严厉惩罚向上窜的速度
    root_vel_z_penalty = RewTerm(
        func=root_vel_z_l2_local,
        weight=-1.0,  # 增大负权重
        params={"asset_cfg": SceneEntityCfg("robot")}
    )

    # 6. 抑制滞空 (Airtime Penalty)
    feet_airtime = RewTerm(
        func=strict_feet_contact_reward,
        weight=-5.0,  # 加大权重，跳一下扣的分比站起来得的分还多
        params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"])}
    )

    joint_vel_penalty = RewTerm(
        func=mdp.joint_vel_l2,
        weight=-0.01,  # 惩罚过快的关节运动
        params={"asset_cfg": SceneEntityCfg("robot")}
    )

    action_rate = RewTerm(
        func=mdp.action_rate_l2,
        weight=-0.05,  # 惩罚动作的突变，让动作更丝滑，减少爆发力
    )

    # 惩罚躯干的翻转和俯仰角速度
    base_ang_vel_penalty = RewTerm(
        func=lambda env, asset_cfg: torch.norm(mdp.base_ang_vel(env, asset_cfg), dim=-1),
        weight=-0.1,
        params={"asset_cfg": SceneEntityCfg("robot")}
    )

    joint_deviation = RewTerm(
        func=joint_pos_rel_l2_local,
        weight=0.1,  # 权重不要太高，只是为了让它动起来
        params={"asset_cfg": SceneEntityCfg("robot")}
    )

    # 7. 成功终极大奖
    is_success = RewTerm(
        func=lambda env, keys: env.termination_manager.get_term(keys),
        weight=1000.0,
        params={"keys": "standing_success"}
    )


@configclass
class T1GetUpTerminationsCfg:
    time_out = DoneTerm(func=mdp.time_out)

    # 失败判定：躯干倾斜超过 45 度重置
    #base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 0.785})

    # 成功判定：双高度 + 稳定
    standing_success = DoneTerm(
        func=is_standing_still,
        params={
            "min_head_height": 1.05,
            "min_pelvis_height": 0.75,
            "max_angle_error": 0.15,
            "standing_time": 0.8,
            "velocity_threshold": 0.15
        }
    )


@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
    scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)  # 5090 性能全开

    def __post_init__(self):
        super().__post_init__()
        self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)

    observations = T1ObservationCfg()
    rewards = T1GetUpRewardCfg()
    terminations = T1GetUpTerminationsCfg()
    events = T1EventCfg()
    actions = T1ActionCfg()

    episode_length_s = 6.0
    decimation = 4
-												Amend bugs

											
										
										
											2026-03-20 07:03:41 -04:00
+								import random
-												Amend tiny bug

											
										
										
											2026-03-20 08:00:51 -04:00
+								import numpy
-												Amend tiny bug

											
										
										
											2026-03-20 08:12:08 -04:00
+								import numpy as np
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								import torch
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								from isaaclab.assets import ArticulationCfg
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								from isaaclab.managers import ObservationGroupCfg as ObsGroup
 								from isaaclab.managers import ObservationTermCfg as ObsTerm
 								from isaaclab.managers import RewardTermCfg as RewTerm
 								from isaaclab.managers import TerminationTermCfg as DoneTerm
 								from isaaclab.managers import EventTermCfg as EventTerm
 								from isaaclab.envs.mdp import JointPositionActionCfg
 								from isaaclab.managers import SceneEntityCfg
 								from isaaclab.utils import configclass
 								from rl_game.get_up.env.t1_env import T1SceneCfg
 								import isaaclab.envs.mdp as mdp
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								# --- 1. 自定义 MDP 逻辑函数 ---
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								def standing_with_feet_reward(
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        env: ManagerBasedRLEnv,
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        min_head_height: float,
 								        min_pelvis_height: float,
 								        sensor_cfg: SceneEntityCfg,
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								        force_threshold: float = 30.0,
 								        max_v_z: float = 0.25
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								) -> torch.Tensor:
 								    """
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    平滑切换的高度奖励：
 								    低高度 -> 纯高度引导
 								    高高度 -> 高度 + 足底力 + 速度约束
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								    """
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    # 1. 获取基本状态
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								    head_idx, _ = env.scene["robot"].find_bodies("H2")
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
 								    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
 								    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    # 2. 计算基础高度得分 (0.0 - 1.0)
 								    head_score = torch.clamp(current_head_h / min_head_height, max=1.0)
 								    pelvis_score = torch.clamp(current_pelvis_h / min_pelvis_height, max=1.0)
 								    combined_height_score = (head_score + pelvis_score) / 2.0
 								    # 3. 计算足底力判定
 								    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
 								    foot_forces_z = torch.sum(contact_sensor.data.net_forces_w[:, :, 2], dim=-1)
 								    is_feet_on_ground = (foot_forces_z > force_threshold).float()
 								    # 4. 计算速度惩罚 (抑制乱跳)
 								    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
 								    vel_penalty_factor = torch.exp(-4.0 * torch.clamp(torch.abs(root_vel_z) - max_v_z, min=0.0))
 								    # --- 核心逻辑切换 ---
 								    # 定义一个“过渡高度” (例如盆骨达到 0.4m)
 								    transition_h = 0.4
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    # 如果高度很低：给纯高度奖，诱导它向上动
 								    low_height_reward = combined_height_score
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    # 如果高度较高：给 综合奖 (高度 * 速度限制 * 必须踩地)
 								    high_height_reward = combined_height_score * vel_penalty_factor * is_feet_on_ground
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    return torch.where(current_pelvis_h < transition_h, low_height_reward, high_height_reward)
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								def arm_push_up_reward(
 								        env: ManagerBasedRLEnv,
 								        sensor_cfg: SceneEntityCfg,
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								        height_threshold: float = 0.5,
 								        min_force: float = 20.0
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								) -> torch.Tensor:
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    """
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								    强化版手臂撑地奖励：
 . 鼓励手臂产生超过阈值的垂直反作用力。
 . 当手臂用力且躯干有向上速度时，给予额外加成。
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    """
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								    # 获取手臂传感器数据
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								    if contact_sensor is None:
 								        return torch.zeros(env.num_envs, device=env.device)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								    # 1. 获取手臂 Z 轴受力 (取所有手臂 Body 的合力或最大力)
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    arm_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
 								    max_arm_force = torch.max(arm_forces_z, dim=-1)[0]
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								    # 归一化受力奖励：在 20N 到 100N 之间线性增长
 								    force_reward = torch.clamp((max_arm_force - min_force) / 80.0, min=0.0, max=1.0)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								    # 2. 获取躯干高度和垂直速度
 								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
 								    current_height = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
 								    root_vel_z = env.scene["robot"].data.root_lin_vel_w[:, 2]
 								    # 3. 协同奖励：当手臂在用力推，且躯干正在上升时，给高分
 								    # 只有在高度低于阈值（还在撑起阶段）时生效
 								    pushing_up_bonus = torch.where(
 								        (max_arm_force > min_force) & (root_vel_z > 0.05),
 								        force_reward * (1.0 + root_vel_z * 2.0),  # 速度越快奖励越高
 								        force_reward
 								    )
 								    # 只有在躯干较低时才发放此奖励
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    return torch.where(current_height < height_threshold,
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								                       pushing_up_bonus,
 								                       torch.zeros_like(pushing_up_bonus))
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								def is_standing_still(
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        env: ManagerBasedRLEnv,
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        min_head_height: float,
 								        min_pelvis_height: float,
 								        max_angle_error: float,
 								        standing_time: float,
 								        velocity_threshold: float = 0.15
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								) -> torch.Tensor:
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    """判定逻辑：双高度达标 + 躯干垂直 + 全身静止"""
 								    head_idx, _ = env.scene["robot"].find_bodies("H2")
 								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
 								    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
 								    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    # 判定条件：头够高 且 盆骨够高 且 垂直误差小 且 速度低
 								    is_stable_now = (
 								            (current_head_h > min_head_height) &
 								            (current_pelvis_h > min_pelvis_height) &
 								            (gravity_error < max_angle_error) &
 								            (root_vel_norm < velocity_threshold)
 								    )
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    if "stable_timer" not in env.extras:
 								        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)
 								    dt = env.physics_dt * env.cfg.decimation
 								    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
 								                                             torch.zeros_like(env.extras["stable_timer"]))
 								    return env.extras["stable_timer"] > standing_time
-												Add feet_airtime loss

											
										
										
											2026-03-19 09:25:20 -04:00
+								def feet_airtime_penalty_local(
 								        env: ManagerBasedRLEnv,
 								        sensor_cfg: SceneEntityCfg,
 								        threshold: float = 1.0
 								) -> torch.Tensor:
 								    """
 								    自定义滞空惩罚逻辑：
 								    如果脚部的垂直合力小于阈值，说明脚离地了。
 								    返回一个 Tensor，离地时为 1.0，着地时为 0.0。
 								    """
 								    # 1. 获取传感器对象
 								    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
 								    if contact_sensor is None:
 								        # 如果没搜到传感器，返回全 0，防止程序崩溃
 								        return torch.zeros(env.num_envs, device=env.device)
 								    # 2. 获取触地力 (num_envs, num_bodies_in_sensor, 3)
 								    # 我们取所有被监测 Body (左右脚) 的 Z 轴推力
 								    # 如果所有脚的力都小于 threshold，判定为“完全腾空”
 								    foot_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
 								    is_in_air = torch.all(foot_forces_z < threshold, dim=-1)
 								    return is_in_air.float()
 								def root_vel_z_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
 								    # 严厉惩罚 Z 轴正向速度（向上窜）
 								    vel_z = env.scene[asset_cfg.name].data.root_lin_vel_w[:, 2]
 								    return torch.square(torch.clamp(vel_z, min=0.0))
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								def joint_pos_rel_l2_local(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
 								    # 获取相对默认位置的偏差 (num_envs, num_joints)
 								    rel_pos = mdp.joint_pos_rel(env, asset_cfg)
 								    # 计算平方和 (L2)
 								    return torch.sum(torch.square(rel_pos), dim=-1)
 								def strict_feet_contact_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
 								    """如果脚不着地，直接给一个很大的负分，强制它必须寻找支点"""
 								    contact_sensor = env.scene.sensors.get(sensor_cfg.name)
 								    # 只要有一只脚没力，就判定为不稳
 								    foot_forces_z = contact_sensor.data.net_forces_w[:, :, 2]
 								    all_feet_cond = torch.min(foot_forces_z, dim=-1)[0] > 5.0  # 左右脚都要有至少5N的力
 								    return (~all_feet_cond).float()  # 返回1表示违规
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
 								# --- 2. 配置类 ---
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								T1_JOINT_NAMES = [
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
 								    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
 								    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
 								class T1ObservationCfg:
 								    @configclass
 								    class PolicyCfg(ObsGroup):
 								        concatenate_terms = True
 								        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
 								        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
 								        projected_gravity = ObsTerm(func=mdp.projected_gravity)
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        root_pos = ObsTerm(func=mdp.root_pos_w)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
 								                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
 								        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
 								                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        actions = ObsTerm(func=mdp.last_action)
 								    policy = PolicyCfg()
 								@configclass
 								class T1EventCfg:
 								    reset_robot_rotation = EventTerm(
-												Amend bugs

											
										
										
											2026-03-20 07:03:41 -04:00
+								        func=mdp.reset_root_state_uniform,
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        params={
 								            "asset_cfg": SceneEntityCfg("robot"),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								            "pose_range": {
-												Amend tiny bug

											
										
										
											2026-03-20 08:00:51 -04:00
+								                "roll": (-1.57, 1.57),   # 左右侧卧
-												Amend tiny bug

											
										
										
											2026-03-20 08:12:08 -04:00
+								                "pitch": tuple(numpy.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])),  # 仰卧/俯卧
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								                "yaw": (-3.14, 3.14), # 全向旋转
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								                "x": (0.0, 0.0),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								                "y": (0.0, 0.0),
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								                "z": (0.1, 0.2),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								            },
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								            "velocity_range": {},
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        },
 								        mode="reset",
 								    )
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								@configclass
 								class T1ActionCfg:
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								    """关键修改：降低 scale 让动作变丝滑，增大阻尼效果"""
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								    joint_pos = JointPositionActionCfg(
 								        asset_name="robot",
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								        joint_names=T1_JOINT_NAMES,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        scale=0.5,
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								        use_default_offset=True
 								    )
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
 								class T1GetUpRewardCfg:
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    # 1. 姿态基础奖 (引导身体变正)
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								    upright = RewTerm(func=mdp.flat_orientation_l2, weight=10.0)
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    # 2. 【条件高度奖】：双高度判定（头+盆骨），且必须脚踩地
 								    height_with_feet = RewTerm(
 								        func=standing_with_feet_reward,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=50.0,  # 作为核心引导，增加权重
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								        params={
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								            "min_head_height": 1.10,
 								            "min_pelvis_height": 0.65,
 								            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								            "force_threshold": 10.0
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								        }
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								    )
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    # 3. 手臂撑地奖：辅助脱离地面阶段
 								    arm_push_support = RewTerm(
 								        func=arm_push_up_reward,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=20.0,  # 显著增加权重（从 3.0 提到 15.0），让它成为起步的关键
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								        params={
 								            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_hand_link", "AL3", "AR3"]),
 								            "height_threshold": 0.6,  # 躯干升到 0.6m 前都鼓励手臂用力
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								            "min_force": 10.0  # 只要有 15N 的力就触发
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								        }
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								    )
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    # 4. 惩罚项
 								    undesired_contacts = RewTerm(
 								        func=mdp.undesired_contacts,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=-2.0,
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								        params={
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
+								            "sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["H2", "Trunk"]),
 								            # 注意：此处必须排除手臂相关 link，否则手臂用力时会同时被扣分
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								            "threshold": 1.0
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								        }
 								    )
-												Add feet_airtime loss

											
										
										
											2026-03-19 09:25:20 -04:00
+								    # 5. 抑制跳跃：严厉惩罚向上窜的速度
 								    root_vel_z_penalty = RewTerm(
 								        func=root_vel_z_l2_local,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=-1.0,  # 增大负权重
-												Add feet_airtime loss

											
										
										
											2026-03-19 09:25:20 -04:00
+								        params={"asset_cfg": SceneEntityCfg("robot")}
 								    )
 								    # 6. 抑制滞空 (Airtime Penalty)
 								    feet_airtime = RewTerm(
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								        func=strict_feet_contact_reward,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=-5.0,  # 加大权重，跳一下扣的分比站起来得的分还多
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								        params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"])}
 								    )
 								    joint_vel_penalty = RewTerm(
 								        func=mdp.joint_vel_l2,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=-0.01,  # 惩罚过快的关节运动
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								        params={"asset_cfg": SceneEntityCfg("robot")}
 								    )
 								    action_rate = RewTerm(
 								        func=mdp.action_rate_l2,
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=-0.05,  # 惩罚动作的突变，让动作更丝滑，减少爆发力
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    )
 								    # 惩罚躯干的翻转和俯仰角速度
 								    base_ang_vel_penalty = RewTerm(
 								        func=lambda env, asset_cfg: torch.norm(mdp.base_ang_vel(env, asset_cfg), dim=-1),
 								        weight=-0.1,
 								        params={"asset_cfg": SceneEntityCfg("robot")}
 								    )
 								    joint_deviation = RewTerm(
 								        func=joint_pos_rel_l2_local,
 								        weight=0.1,  # 权重不要太高，只是为了让它动起来
 								        params={"asset_cfg": SceneEntityCfg("robot")}
-												Add feet_airtime loss

											
										
										
											2026-03-19 09:25:20 -04:00
+								    )
 								    # 7. 成功终极大奖
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								    is_success = RewTerm(
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        func=lambda env, keys: env.termination_manager.get_term(keys),
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        weight=1000.0,
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        params={"keys": "standing_success"}
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								    )
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								class T1GetUpTerminationsCfg:
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								    time_out = DoneTerm(func=mdp.time_out)
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    # 失败判定：躯干倾斜超过 45 度重置
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
+								    #base_crash = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 0.785})
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    # 成功判定：双高度 + 稳定
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								    standing_success = DoneTerm(
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        func=is_standing_still,
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        params={
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								            "min_head_height": 1.05,
 								            "min_pelvis_height": 0.75,
 								            "max_angle_error": 0.15,
 								            "standing_time": 0.8,
 								            "velocity_threshold": 0.15
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        }
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								    )
 								@configclass
 								class T1EnvCfg(ManagerBasedRLEnvCfg):
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)  # 5090 性能全开
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
 								    def __post_init__(self):
 								        super().__post_init__()
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
 								    observations = T1ObservationCfg()
 								    rewards = T1GetUpRewardCfg()
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								    terminations = T1GetUpTerminationsCfg()
 								    events = T1EventCfg()
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								    actions = T1ActionCfg()
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    episode_length_s = 6.0
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								    decimation = 4