rl_game/get_up/config/t1_env_cfg.py

import torch
import random
import numpy as np
import isaaclab.envs.mdp as mdp
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg


# --- 1. 自定义逻辑：阶段性解锁奖励 ---

def sequenced_getup_reward(
        env: ManagerBasedRLEnv,
        crouch_threshold: float = 0.7,  # 蜷缩完成度达到多少解锁下一阶段
        target_knee: float = 1.5,
        target_hip: float = 1.2
) -> torch.Tensor:
    """
    【核心修改】只有先蜷缩，才能拿高度分：
    1. 计算蜷缩程度。
    2. 记录当前 Episode 是否曾经达到过蜷缩目标。
    3. 返回 基础蜷缩奖 + (解锁标志 * 站立奖)。
    """
    # --- 1. 初始化/重置状态位 ---
    if "has_crouched" not in env.extras:
        env.extras["has_crouched"] = torch.zeros(env.num_envs, device=env.device, dtype=torch.bool)

    # 每一回合开始时（reset_buf 为 1），重置该机器人的状态位
    env.extras["has_crouched"] &= ~env.reset_buf

    # --- 2. 计算当前蜷缩质量 ---
    knee_names = ['Left_Knee_Pitch', 'Right_Knee_Pitch']
    hip_names = ['Left_Hip_Pitch', 'Right_Hip_Pitch']
    knee_indices, _ = env.scene["robot"].find_joints(knee_names)
    hip_indices, _ = env.scene["robot"].find_joints(hip_names)
    joint_pos = env.scene["robot"].data.joint_pos

    knee_error = torch.mean(torch.abs(joint_pos[:, knee_indices] - target_knee), dim=-1)
    hip_error = torch.mean(torch.abs(joint_pos[:, hip_indices] - target_hip), dim=-1)

    # 蜷缩得分 (0.0 ~ 1.0)
    crouch_score = torch.exp(-(knee_error + hip_error) / 0.6)

    # --- 3. 判断是否触发解锁 ---
    # 只要在这一回合内，crouch_score 曾经超过阈值，就永久解锁高度奖
    current_success = crouch_score > crouch_threshold
    env.extras["has_crouched"] |= current_success

    # --- 4. 计算高度奖励 ---
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
    # 只有解锁后，高度奖励才生效 (0.0 或 高度值)
    standing_reward = torch.clamp(curr_pelvis_h - 0.3, min=0.0) * 20.0
    gated_standing_reward = env.extras["has_crouched"].float() * standing_reward

    # 总奖励 = 持续引导蜷缩 + 只有解锁后才有的站立奖
    return 5.0 * crouch_score + gated_standing_reward


def is_standing_still(
        env: ManagerBasedRLEnv,
        min_head_height: float,
        min_pelvis_height: float,
        max_angle_error: float,
        standing_time: float,
        velocity_threshold: float = 0.15
) -> torch.Tensor:
    head_idx, _ = env.scene["robot"].find_bodies("H2")
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")

    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]

    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)

    is_stable_now = (
            (current_head_h > min_head_height) &
            (current_pelvis_h > min_pelvis_height) &
            (gravity_error < max_angle_error) &
            (root_vel_norm < velocity_threshold)
    )

    if "stable_timer" not in env.extras:
        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)

    dt = env.physics_dt * env.cfg.decimation
    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
                                             torch.zeros_like(env.extras["stable_timer"]))

    return env.extras["stable_timer"] > standing_time


# --- 2. 配置类 ---

T1_JOINT_NAMES = [
    'AAHead_yaw', 'Head_pitch',
    'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
    'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
    'Waist',
    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
]


@configclass
class T1ObservationCfg:
    @configclass
    class PolicyCfg(ObsGroup):
        concatenate_terms = True
        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
        projected_gravity = ObsTerm(func=mdp.projected_gravity)
        root_pos = ObsTerm(func=mdp.root_pos_w)
        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        actions = ObsTerm(func=mdp.last_action)

    policy = PolicyCfg()


@configclass
class T1EventCfg:
    reset_robot_rotation = EventTerm(
        func=mdp.reset_root_state_uniform,
        params={
            "asset_cfg": SceneEntityCfg("robot"),
            "pose_range": {
                "roll": (-1.57, 1.57),
                "pitch": tuple(np.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])),
                "yaw": (-3.14, 3.14),
                "x": (0.0, 0.0),
                "y": (0.0, 0.0),
                "z": (0.35, 0.45),
            },
            "velocity_range": {},
        },
        mode="reset",
    )


@configclass
class T1ActionCfg:
    # 拆分动作组以防止抽搐。由于不强制规定动作，我们可以给各个部位较为均衡的探索范围。
    arm_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=[
            'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
            'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw'
        ],
        scale=1.0,  # 给了手臂相对充裕的自由度去摸索
        use_default_offset=True
    )

    torso_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=['Waist', 'AAHead_yaw', 'Head_pitch'],
        scale=0.7,
        use_default_offset=True
    )

    leg_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=[
            'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
            'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
            'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
        ],
        scale=0.5,
        use_default_offset=True
    )


@configclass
class T1GetUpRewardCfg:
    # 核心：顺序阶段奖励
    sequenced_task = RewTerm(
        func=sequenced_getup_reward,
        weight=10.0,
        params={"crouch_threshold": 0.75}  # 必须完成 75% 的收腿动作才解锁高度奖
    )

    # 姿态惩罚：即便解锁了高度奖，如果姿态歪了也要扣分
    orientation = RewTerm(
        func=mdp.flat_orientation_l2,
        weight=-2.5
    )

    # 抑制抽搐
    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.08)

    # 最终站稳奖
    is_success_maintain = RewTerm(
        func=is_standing_still,
        weight=100.0,
        params={
            "min_head_height": 1.08,
            "min_pelvis_height": 0.72,
            "max_angle_error": 0.25,
            "standing_time": 0.4,
            "velocity_threshold": 0.2
        }
    )


@configclass
class T1GetUpTerminationsCfg:
    time_out = DoneTerm(func=mdp.time_out)
    standing_success = DoneTerm(
        func=is_standing_still,
        params={
            "min_head_height": 1.08,
            "min_pelvis_height": 0.72,
            "max_angle_error": 0.3,
            "standing_time": 0.3,
            "velocity_threshold": 0.4
        }
    )


@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
    scene = T1SceneCfg(num_envs=8192, env_spacing=2.5)
    observations = T1ObservationCfg()
    rewards = T1GetUpRewardCfg()
    terminations = T1GetUpTerminationsCfg()
    events = T1EventCfg()
    actions = T1ActionCfg()
    episode_length_s = 10.0
    decimation = 4
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								import torch
-												Amend bugs

											
										
										
											2026-03-20 07:03:41 -04:00
+								import random
-												Amend tiny bug

											
										
										
											2026-03-20 08:12:08 -04:00
+								import numpy as np
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								import isaaclab.envs.mdp as mdp
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								from isaaclab.assets import ArticulationCfg
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								from isaaclab.managers import ObservationGroupCfg as ObsGroup
 								from isaaclab.managers import ObservationTermCfg as ObsTerm
 								from isaaclab.managers import RewardTermCfg as RewTerm
 								from isaaclab.managers import TerminationTermCfg as DoneTerm
 								from isaaclab.managers import EventTermCfg as EventTerm
 								from isaaclab.envs.mdp import JointPositionActionCfg
 								from isaaclab.managers import SceneEntityCfg
 								from isaaclab.utils import configclass
 								from rl_game.get_up.env.t1_env import T1SceneCfg
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								# --- 1. 自定义逻辑：阶段性解锁奖励 ---
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								def sequenced_getup_reward(
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        env: ManagerBasedRLEnv,
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								        crouch_threshold: float = 0.7,  # 蜷缩完成度达到多少解锁下一阶段
 								        target_knee: float = 1.5,
 								        target_hip: float = 1.2
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								) -> torch.Tensor:
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    """
 								    【核心修改】只有先蜷缩，才能拿高度分：
 . 计算蜷缩程度。
 . 记录当前 Episode 是否曾经达到过蜷缩目标。
 . 返回 基础蜷缩奖 + (解锁标志 * 站立奖)。
 								    """
 								    # --- 1. 初始化/重置状态位 ---
 								    if "has_crouched" not in env.extras:
 								        env.extras["has_crouched"] = torch.zeros(env.num_envs, device=env.device, dtype=torch.bool)
 								    # 每一回合开始时（reset_buf 为 1），重置该机器人的状态位
 								    env.extras["has_crouched"] &= ~env.reset_buf
 								    # --- 2. 计算当前蜷缩质量 ---
 								    knee_names = ['Left_Knee_Pitch', 'Right_Knee_Pitch']
 								    hip_names = ['Left_Hip_Pitch', 'Right_Hip_Pitch']
 								    knee_indices, _ = env.scene["robot"].find_joints(knee_names)
 								    hip_indices, _ = env.scene["robot"].find_joints(hip_names)
 								    joint_pos = env.scene["robot"].data.joint_pos
-												Amend for standing

											
										
										
											2026-03-20 03:37:56 -04:00
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    knee_error = torch.mean(torch.abs(joint_pos[:, knee_indices] - target_knee), dim=-1)
 								    hip_error = torch.mean(torch.abs(joint_pos[:, hip_indices] - target_hip), dim=-1)
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # 蜷缩得分 (0.0 ~ 1.0)
 								    crouch_score = torch.exp(-(knee_error + hip_error) / 0.6)
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # --- 3. 判断是否触发解锁 ---
 								    # 只要在这一回合内，crouch_score 曾经超过阈值，就永久解锁高度奖
 								    current_success = crouch_score > crouch_threshold
 								    env.extras["has_crouched"] |= current_success
-												change arm to push the ground reward function

											
										
										
											2026-03-21 08:38:17 -04:00
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # --- 4. 计算高度奖励 ---
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
 								    curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # 只有解锁后，高度奖励才生效 (0.0 或 高度值)
 								    standing_reward = torch.clamp(curr_pelvis_h - 0.3, min=0.0) * 20.0
 								    gated_standing_reward = env.extras["has_crouched"].float() * standing_reward
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # 总奖励 = 持续引导蜷缩 + 只有解锁后才有的站立奖
 								    return 5.0 * crouch_score + gated_standing_reward
-												change T1EventCfg to add more initial state

											
										
										
											2026-03-20 05:20:17 -04:00
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								def is_standing_still(
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        env: ManagerBasedRLEnv,
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								        min_head_height: float,
 								        min_pelvis_height: float,
 								        max_angle_error: float,
 								        standing_time: float,
 								        velocity_threshold: float = 0.15
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								) -> torch.Tensor:
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    head_idx, _ = env.scene["robot"].find_bodies("H2")
 								    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
 								    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
 								    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    is_stable_now = (
 								            (current_head_h > min_head_height) &
 								            (current_pelvis_h > min_pelvis_height) &
 								            (gravity_error < max_angle_error) &
 								            (root_vel_norm < velocity_threshold)
 								    )
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								    if "stable_timer" not in env.extras:
 								        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)
 								    dt = env.physics_dt * env.cfg.decimation
 								    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
 								                                             torch.zeros_like(env.extras["stable_timer"]))
 								    return env.extras["stable_timer"] > standing_time
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
-												Add arm link rewards

											
										
										
											2026-03-19 09:08:57 -04:00
+								# --- 2. 配置类 ---
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								T1_JOINT_NAMES = [
-												Amend symbol to save video memory

											
										
										
											2026-03-22 03:05:24 -04:00
+								    'AAHead_yaw', 'Head_pitch',
-												Amend bugs

											
										
										
											2026-03-22 00:01:21 -04:00
+								    'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
 								    'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
-												Amend symbol

											
										
										
											2026-03-22 02:57:04 -04:00
+								    'Waist',
-												prevent gradient explosion

											
										
										
											2026-03-22 02:55:07 -04:00
+								    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
 								    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
-												Amend symbol

											
										
										
											2026-03-22 02:57:04 -04:00
+								    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								]
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
 								class T1ObservationCfg:
 								    @configclass
 								    class PolicyCfg(ObsGroup):
 								        concatenate_terms = True
 								        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
 								        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
 								        projected_gravity = ObsTerm(func=mdp.projected_gravity)
-												change parameter

											
										
										
											2026-03-20 08:55:29 -04:00
+								        root_pos = ObsTerm(func=mdp.root_pos_w)
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
 								                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
 								        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
 								                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        actions = ObsTerm(func=mdp.last_action)
 								    policy = PolicyCfg()
 								@configclass
 								class T1EventCfg:
 								    reset_robot_rotation = EventTerm(
-												Amend bugs

											
										
										
											2026-03-20 07:03:41 -04:00
+								        func=mdp.reset_root_state_uniform,
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        params={
 								            "asset_cfg": SceneEntityCfg("robot"),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								            "pose_range": {
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								                "roll": (-1.57, 1.57),
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								                "pitch": tuple(np.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])),
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								                "yaw": (-3.14, 3.14),
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								                "x": (0.0, 0.0),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								                "y": (0.0, 0.0),
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								                "z": (0.35, 0.45),
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								            },
-												Amend tiny bug

											
										
										
											2026-03-22 21:21:17 -04:00
+								            "velocity_range": {},
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								        },
 								        mode="reset",
 								    )
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								@configclass
 								class T1ActionCfg:
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    # 拆分动作组以防止抽搐。由于不强制规定动作，我们可以给各个部位较为均衡的探索范围。
 								    arm_action = JointPositionActionCfg(
 								        asset_name="robot",
 								        joint_names=[
 								            'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
 								            'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw'
 								        ],
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								        scale=1.0,  # 给了手臂相对充裕的自由度去摸索
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        use_default_offset=True
 								    )
 								    torso_action = JointPositionActionCfg(
 								        asset_name="robot",
 								        joint_names=['Waist', 'AAHead_yaw', 'Head_pitch'],
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								        scale=0.7,
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        use_default_offset=True
 								    )
 								    leg_action = JointPositionActionCfg(
 								        asset_name="robot",
 								        joint_names=[
 								            'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
 								            'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
 								            'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
 								        ],
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								        scale=0.5,
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								        use_default_offset=True
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								    )
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
 								class T1GetUpRewardCfg:
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # 核心：顺序阶段奖励
 								    sequenced_task = RewTerm(
 								        func=sequenced_getup_reward,
 								        weight=10.0,
 								        params={"crouch_threshold": 0.75}  # 必须完成 75% 的收腿动作才解锁高度奖
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    )
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # 姿态惩罚：即便解锁了高度奖，如果姿态歪了也要扣分
 								    orientation = RewTerm(
 								        func=mdp.flat_orientation_l2,
 								        weight=-2.5
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								    )
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # 抑制抽搐
 								    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.08)
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								    # 最终站稳奖
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    is_success_maintain = RewTerm(
-												Amend success rewards

											
										
										
											2026-03-22 02:32:58 -04:00
+								        func=is_standing_still,
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								        weight=100.0,
-												Amend success rewards

											
										
										
											2026-03-22 02:32:58 -04:00
+								        params={
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								            "min_head_height": 1.08,
 								            "min_pelvis_height": 0.72,
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								            "max_angle_error": 0.25,
 								            "standing_time": 0.4,
 								            "velocity_threshold": 0.2
-												Amend success rewards

											
										
										
											2026-03-22 02:32:58 -04:00
+								        }
-												change reward add punishment of joint_vel and root_vel_z_penalty

											
										
										
											2026-03-17 05:54:20 -04:00
+								    )
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								@configclass
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								class T1GetUpTerminationsCfg:
-												Amend some codes to init training for get up better

											
										
										
											2026-03-18 06:05:30 -04:00
+								    time_out = DoneTerm(func=mdp.time_out)
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								    standing_success = DoneTerm(
-												change rewards

											
										
										
											2026-03-19 06:29:30 -04:00
+								        func=is_standing_still,
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        params={
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								            "min_head_height": 1.08,
 								            "min_pelvis_height": 0.72,
-												change arm to push the ground reward function

											
										
										
											2026-03-21 08:38:17 -04:00
+								            "max_angle_error": 0.3,
-												reward modification and add stage reward

											
										
										
											2026-03-23 10:17:31 -04:00
+								            "standing_time": 0.3,
 								            "velocity_threshold": 0.4
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								        }
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								    )
 								@configclass
 								class T1EnvCfg(ManagerBasedRLEnvCfg):
-												Add weighting function, change the reward logic

											
										
										
											2026-03-22 21:11:46 -04:00
+								    scene = T1SceneCfg(num_envs=8192, env_spacing=2.5)
-												The demo of get up

											
										
										
											2026-03-16 05:00:20 -04:00
+								    observations = T1ObservationCfg()
 								    rewards = T1GetUpRewardCfg()
-												Add reward to maintain an upright and stable position

											
										
										
											2026-03-16 09:23:22 -04:00
+								    terminations = T1GetUpTerminationsCfg()
 								    events = T1EventCfg()
-												Amend some bugs and make it training

											
										
										
											2026-03-16 05:46:49 -04:00
+								    actions = T1ActionCfg()
-												reward modification and change the get_up logic

											
										
										
											2026-03-23 09:06:36 -04:00
+								    episode_length_s = 10.0
-												change reward function

											
										
										
											2026-03-21 07:00:49 -04:00
+								    decimation = 4