add some codes to make retain come true

2026-03-17 05:56:26 -04:00
8 changed files with 42 additions and 513 deletions
--- a/rl_game/demo/train.py
+++ b/rl_game/demo/train.py
@@ -1,83 +1,101 @@
 import sys
 import os
-# 关键：确保当前目录在 sys.path 中，这样才能直接 from config 导入
+import argparse
+
+# 确保能找到项目根目录下的模块
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))

-import argparse
 from isaaclab.app import AppLauncher

-# 添加启动参数
-parser = argparse.ArgumentParser(description="Train T1 robot with rl_games.")
-parser.add_argument("--num_envs", type=int, default=16384, help="Number of envs to run.")
+# 1. 配置启动参数
+parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.")
+parser.add_argument("--num_envs", type=int, default=16384, help="起身任务建议并行 4096 即可")
+parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID")
+parser.add_argument("--seed", type=int, default=42, help="随机种子")
 AppLauncher.add_app_launcher_args(parser)
 args_cli = parser.parse_args()

-# 启动仿真器
+# 2. 启动仿真器（必须在导入其他 isaaclab 模块前）
 app_launcher = AppLauncher(args_cli)
 simulation_app = app_launcher.app

 import torch
 import gymnasium as gym
+import yaml
 from isaaclab_rl.rl_games import RlGamesVecEnvWrapper
 from rl_games.torch_runner import Runner
-import yaml
-from config.t1_env_cfg import T1EnvCfg
 from rl_games.common import env_configurations, vecenv

+# 导入你刚刚修改好的配置类
+# 假设你的文件名是 t1_getup_cfg.py，类名是 T1EnvCfg
+from config.t1_env_cfg import T1EnvCfg
+
+# 3. 注册环境
 gym.register(
-    id="Isaac-T1-Walking-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口
+    id="Isaac-T1-Walk-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
    kwargs={
-        "cfg": T1EnvCfg(),
+        "cfg": T1EnvCfg(),  # 这里会加载你设置的随机旋转、时间惩罚等
    },
 )

-def main():
-    # 1. 创建环境 (保持不变)
-    env = gym.make("Isaac-T1-Walking-v0", num_envs=args_cli.num_envs)

-    # 2. 包装环境 (保持不变)
+def main():
+    # --- 新增：处理 Retrain 参数 ---
+    # 你可以手动指定路径，或者在 argparse 里增加一个 --checkpoint 参数
+    checkpoint_path = os.path.join(os.path.dirname(__file__), "logs/T1_GetUp/nn/**.pth")
+    # 检查模型文件是否存在
+    should_retrain = os.path.exists(checkpoint_path)
+
+    env = gym.make("Isaac-T1-Walk-v0", num_envs=args_cli.num_envs)
+
+    # 注意：rl_device 必须设置为 args_cli.device (通常是 'cuda:0')
    wrapped_env = RlGamesVecEnvWrapper(
        env,
        rl_device=args_cli.device,
        clip_obs=5.0,
-        clip_actions=100.0
+        clip_actions=1.0
    )

    vecenv.register('as_is', lambda config_name, num_actors, **kwargs: wrapped_env)

-    # 注册环境配置
    env_configurations.register('rlgym', {
        'vecenv_type': 'as_is',
        'env_creator': lambda **kwargs: wrapped_env
    })

-    # 3. 加载 PPO 配置 (保持不变)
    config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml")
    with open(config_path, "r") as f:
        rl_config = yaml.safe_load(f)

-    # 设置日志路径
+    # 设置日志和实验名称
    rl_game_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
    log_dir = os.path.join(rl_game_dir, "logs")
    rl_config['params']['config']['train_dir'] = log_dir
+    rl_config['params']['config']['name'] = "T1_GetUp"

-    # 4. 启动训练
+    # --- 关键修改：注入模型路径 ---
+    if should_retrain:
+        print(f"[INFO]: 检测到预训练模型，正在从 {checkpoint_path} 恢复训练...")
+        # rl_games 会读取 config 中的 load_path 进行续训
+        rl_config['params']['config']['load_path'] = checkpoint_path
+    else:
+        print("[INFO]: 未找到预训练模型，将从零开始训练。")
+
+    # 7. 运行训练
    runner = Runner()
-
-    # 此时 rl_config 只有文本和数字，没有复杂对象，deepcopy 会成功
    runner.load(rl_config)

-    # 在 run 时传入对象是安全的
    runner.run({
        "train": True,
        "play": False,
+        # 如果你想强制从某个 checkpoint 开始，也可以在这里传参
+        "checkpoint": checkpoint_path if should_retrain else None,
        "vec_env": wrapped_env
    })

    simulation_app.close()

-# PYTHONPATH=. python rl_game/your_file_name/train.py

 if __name__ == "__main__":
    main()
--- a/rl_game/get_up/init.py
+++ b/rl_game/get_up/init.py
@@ -1,13 +0,0 @@
-import gymnasium as gym
-
-# 导入你的配置
-from rl_game.demo.config.t1_env_cfg import T1EnvCfg
-
-# 注册环境到 Gymnasium
-gym.register(
-    id="Isaac-T1-GetUp-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口
-    kwargs={
-        "cfg": T1EnvCfg(),
-    },
-)
--- a/rl_game/get_up/asset/t1/T1_locomotion_base.usd
+++ b/rl_game/get_up/asset/t1/T1_locomotion_base.usd
--- a/rl_game/get_up/asset/t1/T1_locomotion_physics_lab.usd
+++ b/rl_game/get_up/asset/t1/T1_locomotion_physics_lab.usd
--- a/rl_game/get_up/config/ppo_cfg.yaml
+++ b/rl_game/get_up/config/ppo_cfg.yaml
@@ -1,60 +0,0 @@
-params:
-  seed: 42
-  algo:
-    name: a2c_continuous
-
-  model:
-    name: continuous_a2c_logstd
-
-  network:
-    name: actor_critic
-    separate: False
-    space:
-      continuous:
-        mu_activation: None
-        sigma_activation: None
-        mu_init:
-          name: default
-        sigma_init:
-          name: const_initializer
-          val: 0.5
-        fixed_sigma: False
-    mlp:
-      units: [512, 256, 128]
-      activation: relu
-      d2rl: False
-      initializer:
-        name: default
-
-  config:
-    name: T1_Walking
-    env_name: rlgym # Isaac Lab 包装器
-    multi_gpu: False
-    ppo: True
-    mixed_precision: True
-    normalize_input: True
-    normalize_value: True
-    value_bootstrap: True
-    num_actors: 8192 # 同时训练的机器人数量
-    reward_shaper:
-      scale_value: 1.0
-    normalize_advantage: True
-    gamma: 0.98
-    tau: 0.95
-    learning_rate: 3e-4
-    lr_schedule: adaptive
-    kl_threshold: 0.015
-    score_to_win: 20000
-    max_epochs: 500
-    save_best_after: 50
-    save_frequency: 100
-    grad_norm: 1.0
-    entropy_coef: 0.005
-    truncate_grads: True
-    bounds_loss_coef: 0.001
-    e_clip: 0.2
-    horizon_length: 256
-    minibatch_size: 65536
-    mini_epochs: 4
-    critic_coef: 1
-    clip_value: True
--- a/rl_game/get_up/config/t1_env_cfg.py
+++ b/rl_game/get_up/config/t1_env_cfg.py
@@ -1,241 +0,0 @@
-import torch
-import random
-import numpy as np
-import isaaclab.envs.mdp as mdp
-from isaaclab.assets import ArticulationCfg
-from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
-from isaaclab.managers import ObservationGroupCfg as ObsGroup
-from isaaclab.managers import ObservationTermCfg as ObsTerm
-from isaaclab.managers import RewardTermCfg as RewTerm
-from isaaclab.managers import TerminationTermCfg as DoneTerm
-from isaaclab.managers import EventTermCfg as EventTerm
-from isaaclab.envs.mdp import JointPositionActionCfg
-from isaaclab.managers import SceneEntityCfg
-from isaaclab.utils import configclass
-from rl_game.get_up.env.t1_env import T1SceneCfg
-
-
-# --- 1. 自定义逻辑：阶段性解锁奖励 ---
-
-def sequenced_getup_reward(
-        env: ManagerBasedRLEnv,
-        crouch_threshold: float = 0.7,  # 蜷缩完成度达到多少解锁下一阶段
-        target_knee: float = 1.5,
-        target_hip: float = 1.2
-) -> torch.Tensor:
-    """
-    【核心修改】只有先蜷缩，才能拿高度分：
-    1. 计算蜷缩程度。
-    2. 记录当前 Episode 是否曾经达到过蜷缩目标。
-    3. 返回 基础蜷缩奖 + (解锁标志 * 站立奖)。
-    """
-    # --- 1. 初始化/重置状态位 ---
-    if "has_crouched" not in env.extras:
-        env.extras["has_crouched"] = torch.zeros(env.num_envs, device=env.device, dtype=torch.bool)
-
-    # 每一回合开始时（reset_buf 为 1），重置该机器人的状态位
-    env.extras["has_crouched"] &= ~env.reset_buf
-
-    # --- 2. 计算当前蜷缩质量 ---
-    knee_names = ['Left_Knee_Pitch', 'Right_Knee_Pitch']
-    hip_names = ['Left_Hip_Pitch', 'Right_Hip_Pitch']
-    knee_indices, _ = env.scene["robot"].find_joints(knee_names)
-    hip_indices, _ = env.scene["robot"].find_joints(hip_names)
-    joint_pos = env.scene["robot"].data.joint_pos
-
-    knee_error = torch.mean(torch.abs(joint_pos[:, knee_indices] - target_knee), dim=-1)
-    hip_error = torch.mean(torch.abs(joint_pos[:, hip_indices] - target_hip), dim=-1)
-
-    # 蜷缩得分 (0.0 ~ 1.0)
-    crouch_score = torch.exp(-(knee_error + hip_error) / 0.6)
-
-    # --- 3. 判断是否触发解锁 ---
-    # 只要在这一回合内，crouch_score 曾经超过阈值，就永久解锁高度奖
-    current_success = crouch_score > crouch_threshold
-    env.extras["has_crouched"] |= current_success
-
-    # --- 4. 计算高度奖励 ---
-    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
-    curr_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-    # 只有解锁后，高度奖励才生效 (0.0 或 高度值)
-    standing_reward = torch.clamp(curr_pelvis_h - 0.3, min=0.0) * 20.0
-    gated_standing_reward = env.extras["has_crouched"].float() * standing_reward
-
-    # 总奖励 = 持续引导蜷缩 + 只有解锁后才有的站立奖
-    return 5.0 * crouch_score + gated_standing_reward
-
-
-def is_standing_still(
-        env: ManagerBasedRLEnv,
-        min_head_height: float,
-        min_pelvis_height: float,
-        max_angle_error: float,
-        standing_time: float,
-        velocity_threshold: float = 0.15
-) -> torch.Tensor:
-    head_idx, _ = env.scene["robot"].find_bodies("H2")
-    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
-
-    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
-    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
-
-    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
-    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)
-
-    is_stable_now = (
-            (current_head_h > min_head_height) &
-            (current_pelvis_h > min_pelvis_height) &
-            (gravity_error < max_angle_error) &
-            (root_vel_norm < velocity_threshold)
-    )
-
-    if "stable_timer" not in env.extras:
-        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)
-
-    dt = env.physics_dt * env.cfg.decimation
-    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
-                                             torch.zeros_like(env.extras["stable_timer"]))
-
-    return env.extras["stable_timer"] > standing_time
-
-
-# --- 2. 配置类 ---
-
-T1_JOINT_NAMES = [
-    'AAHead_yaw', 'Head_pitch',
-    'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
-    'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
-    'Waist',
-    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
-    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
-    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
-]
-
-
-@configclass
-class T1ObservationCfg:
-    @configclass
-    class PolicyCfg(ObsGroup):
-        concatenate_terms = True
-        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
-        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
-        projected_gravity = ObsTerm(func=mdp.projected_gravity)
-        root_pos = ObsTerm(func=mdp.root_pos_w)
-        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
-                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
-        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
-                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
-        actions = ObsTerm(func=mdp.last_action)
-
-    policy = PolicyCfg()
-
-
-@configclass
-class T1EventCfg:
-    reset_robot_rotation = EventTerm(
-        func=mdp.reset_root_state_uniform,
-        params={
-            "asset_cfg": SceneEntityCfg("robot"),
-            "pose_range": {
-                "roll": (-1.57, 1.57),
-                "pitch": tuple(np.array([1.4, 1.6], dtype=np.float32) * random.choice([-1 , 1])),
-                "yaw": (-3.14, 3.14),
-                "x": (0.0, 0.0),
-                "y": (0.0, 0.0),
-                "z": (0.35, 0.45),
-            },
-            "velocity_range": {},
-        },
-        mode="reset",
-    )
-
-
-@configclass
-class T1ActionCfg:
-    # 拆分动作组以防止抽搐。由于不强制规定动作，我们可以给各个部位较为均衡的探索范围。
-    arm_action = JointPositionActionCfg(
-        asset_name="robot",
-        joint_names=[
-            'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
-            'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw'
-        ],
-        scale=1.0,  # 给了手臂相对充裕的自由度去摸索
-        use_default_offset=True
-    )
-
-    torso_action = JointPositionActionCfg(
-        asset_name="robot",
-        joint_names=['Waist', 'AAHead_yaw', 'Head_pitch'],
-        scale=0.7,
-        use_default_offset=True
-    )
-
-    leg_action = JointPositionActionCfg(
-        asset_name="robot",
-        joint_names=[
-            'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
-            'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
-            'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
-        ],
-        scale=0.5,
-        use_default_offset=True
-    )
-
-
-@configclass
-class T1GetUpRewardCfg:
-    # 核心：顺序阶段奖励
-    sequenced_task = RewTerm(
-        func=sequenced_getup_reward,
-        weight=10.0,
-        params={"crouch_threshold": 0.75}  # 必须完成 75% 的收腿动作才解锁高度奖
-    )
-
-    # 姿态惩罚：即便解锁了高度奖，如果姿态歪了也要扣分
-    orientation = RewTerm(
-        func=mdp.flat_orientation_l2,
-        weight=-2.5
-    )
-
-    # 抑制抽搐
-    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.08)
-
-    # 最终站稳奖
-    is_success_maintain = RewTerm(
-        func=is_standing_still,
-        weight=100.0,
-        params={
-            "min_head_height": 1.08,
-            "min_pelvis_height": 0.72,
-            "max_angle_error": 0.25,
-            "standing_time": 0.4,
-            "velocity_threshold": 0.2
-        }
-    )
-
-
-@configclass
-class T1GetUpTerminationsCfg:
-    time_out = DoneTerm(func=mdp.time_out)
-    standing_success = DoneTerm(
-        func=is_standing_still,
-        params={
-            "min_head_height": 1.08,
-            "min_pelvis_height": 0.72,
-            "max_angle_error": 0.3,
-            "standing_time": 0.3,
-            "velocity_threshold": 0.4
-        }
-    )
-
-
-@configclass
-class T1EnvCfg(ManagerBasedRLEnvCfg):
-    scene = T1SceneCfg(num_envs=8192, env_spacing=2.5)
-    observations = T1ObservationCfg()
-    rewards = T1GetUpRewardCfg()
-    terminations = T1GetUpTerminationsCfg()
-    events = T1EventCfg()
-    actions = T1ActionCfg()
-    episode_length_s = 10.0
-    decimation = 4
--- a/rl_game/get_up/env/t1_env.py
+++ b/rl_game/get_up/env/t1_env.py
@@ -1,74 +0,0 @@
-from isaaclab.assets import ArticulationCfg, AssetBaseCfg
-from isaaclab.scene import InteractiveSceneCfg
-from isaaclab.sensors import ContactSensorCfg
-from isaaclab.utils import configclass
-from isaaclab.actuators import ImplicitActuatorCfg
-from isaaclab import sim as sim_utils
-
-import os
-
-_DEMO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "T1_locomotion_physics_lab.usd")
-
-@configclass
-class T1SceneCfg(InteractiveSceneCfg):
-    """最终修正版：彻底解决 Unknown asset config type 报错"""
-
-    # 1. 地面配置：直接在 spawn 内部定义材质
-    ground = AssetBaseCfg(
-        prim_path="/World/ground",
-        spawn=sim_utils.GroundPlaneCfg(
-            physics_material=sim_utils.RigidBodyMaterialCfg(
-                static_friction=1.0,
-                dynamic_friction=1.0,
-                restitution=0.3,
-                friction_combine_mode="average",
-                restitution_combine_mode="average",
-            )
-        ),
-    )
-
-    # 2. 机器人配置
-    robot = ArticulationCfg(
-        prim_path="{ENV_REGEX_NS}/Robot",
-        spawn=sim_utils.UsdFileCfg(
-            usd_path=T1_USD_PATH,
-            activate_contact_sensors=True,
-            rigid_props=sim_utils.RigidBodyPropertiesCfg(
-                disable_gravity=False,
-                max_depenetration_velocity=10.0,
-            ),
-            articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-                enabled_self_collisions=True,
-                solver_position_iteration_count=8,
-                solver_velocity_iteration_count=4,
-            ),
-        ),
-        init_state=ArticulationCfg.InitialStateCfg(
-            pos=(0.0, 0.0, 0.4), # 掉落高度
-            joint_pos={".*": 0.0},
-        ),
-        actuators={
-            "t1_joints": ImplicitActuatorCfg(
-                joint_names_expr=[".*"],
-                effort_limit=800.0,  # 翻倍，确保电机有力气
-                velocity_limit=20.0,
-                stiffness=500.0,  # 【关键】从 150 提到 500-800 之间
-                damping=40.0,  # 【关键】从 5 提到 30-50 之间，抑制乱抖
-            ),
-        },
-    )
-
-    contact_sensor = ContactSensorCfg(
-        prim_path="{ENV_REGEX_NS}/Robot/.*",
-        update_period=0.0,
-        history_length=3,
-    )
-
-    # 3. 光照配置
-    light = AssetBaseCfg(
-        prim_path="/World/light",
-        spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
-    )
-
-# ['Trunk', 'H1', 'H2', 'AL1', 'AL2', 'AL3', 'left_hand_link', 'AR1', 'AR2', 'AR3', 'right_hand_link', 'Waist', 'Hip_Pitch_Left', 'Hip_Roll_Left', 'Hip_Yaw_Left', 'Shank_Left', 'Ankle_Cross_Left', 'left_foot_link', 'Hip_Pitch_Right', 'Hip_Roll_Right', 'Hip_Yaw_Right', 'Shank_Right', 'Ankle_Cross_Right', 'right_foot_link']
--- a/rl_game/get_up/train.py
+++ b/rl_game/get_up/train.py
@@ -1,101 +0,0 @@
-import sys
-import os
-import argparse
-
-# 确保能找到项目根目录下的模块
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-
-from isaaclab.app import AppLauncher
-
-# 1. 配置启动参数
-parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.")
-parser.add_argument("--num_envs", type=int, default=8192, help="起身任务建议并行 4096 即可")
-parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID")
-parser.add_argument("--seed", type=int, default=42, help="随机种子")
-AppLauncher.add_app_launcher_args(parser)
-args_cli = parser.parse_args()
-
-# 2. 启动仿真器（必须在导入其他 isaaclab 模块前）
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-import torch
-import gymnasium as gym
-import yaml
-from isaaclab_rl.rl_games import RlGamesVecEnvWrapper
-from rl_games.torch_runner import Runner
-from rl_games.common import env_configurations, vecenv
-
-# 导入你刚刚修改好的配置类
-# 假设你的文件名是 t1_getup_cfg.py，类名是 T1EnvCfg
-from config.t1_env_cfg import T1EnvCfg
-
-# 3. 注册环境
-gym.register(
-    id="Isaac-T1-GetUp-v0",
-    entry_point="isaaclab.envs:ManagerBasedRLEnv",
-    kwargs={
-        "cfg": T1EnvCfg(),  # 这里会加载你设置的随机旋转、时间惩罚等
-    },
-)
-
-
-def main():
-    # --- 新增：处理 Retrain 参数 ---
-    # 你可以手动指定路径，或者在 argparse 里增加一个 --checkpoint 参数
-    checkpoint_path = os.path.join(os.path.dirname(__file__), "logs/T1_GetUp/nn/T1_GetUp.pth")
-    # 检查模型文件是否存在
-    should_retrain = os.path.exists(checkpoint_path)
-
-    env = gym.make("Isaac-T1-GetUp-v0", num_envs=args_cli.num_envs)
-
-    # 注意：rl_device 必须设置为 args_cli.device (通常是 'cuda:0')
-    wrapped_env = RlGamesVecEnvWrapper(
-        env,
-        rl_device=args_cli.device,
-        clip_obs=5.0,
-        clip_actions=1.0
-    )
-
-    vecenv.register('as_is', lambda config_name, num_actors, **kwargs: wrapped_env)
-
-    env_configurations.register('rlgym', {
-        'vecenv_type': 'as_is',
-        'env_creator': lambda **kwargs: wrapped_env
-    })
-
-    config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml")
-    with open(config_path, "r") as f:
-        rl_config = yaml.safe_load(f)
-
-    # 设置日志和实验名称
-    rl_game_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
-    log_dir = os.path.join(rl_game_dir, "logs")
-    rl_config['params']['config']['train_dir'] = log_dir
-    rl_config['params']['config']['name'] = "T1_GetUp"
-
-    # --- 关键修改：注入模型路径 ---
-    if should_retrain:
-        print(f"[INFO]: 检测到预训练模型，正在从 {checkpoint_path} 恢复训练...")
-        # rl_games 会读取 config 中的 load_path 进行续训
-        rl_config['params']['config']['load_path'] = checkpoint_path
-    else:
-        print("[INFO]: 未找到预训练模型，将从零开始训练。")
-
-    # 7. 运行训练
-    runner = Runner()
-    runner.load(rl_config)
-
-    runner.run({
-        "train": True,
-        "play": False,
-        # 如果你想强制从某个 checkpoint 开始，也可以在这里传参
-        "checkpoint": checkpoint_path if should_retrain else None,
-        "vec_env": wrapped_env
-    })
-
-    simulation_app.close()
-
-
-if __name__ == "__main__":
-    main()