change reward add punishment of joint_vel and root_vel_z_penalty

This commit is contained in:
2026-03-17 05:54:20 -04:00
parent c1e3d9382f
commit 4933567ef8
2 changed files with 102 additions and 47 deletions

View File

@@ -41,19 +41,22 @@ gym.register(
def main():
# 4. 创建环境,显式传入命令行指定的 num_envs
# --- 新增:处理 Retrain 参数 ---
# 你可以手动指定路径,或者在 argparse 里增加一个 --checkpoint 参数
checkpoint_path = os.path.join(os.path.dirname(__file__), "logs/T1_GetUp/nn/T1_GetUp.pth")
# 检查模型文件是否存在
should_retrain = os.path.exists(checkpoint_path)
env = gym.make("Isaac-T1-GetUp-v0", num_envs=args_cli.num_envs)
# 5. 包装环境
# 注意rl_device 必须设置为 args_cli.device (通常是 'cuda:0')
wrapped_env = RlGamesVecEnvWrapper(
env,
rl_device=args_cli.device,
clip_obs=5.0,
clip_actions=1.0 # 动作裁剪建议设小一点,防止电机输出瞬间爆表
clip_actions=1.0
)
# 注册给 rl_games 使用
vecenv.register('as_is', lambda config_name, num_actors, **kwargs: wrapped_env)
env_configurations.register('rlgym', {
@@ -61,8 +64,6 @@ def main():
'env_creator': lambda **kwargs: wrapped_env
})
# 6. 加载 PPO 配置文件
# 提示:由于是起身任务,建议在 ppo_cfg.yaml 中调大 mini_batch 大数或提高学习率
config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml")
with open(config_path, "r") as f:
rl_config = yaml.safe_load(f)
@@ -73,15 +74,23 @@ def main():
rl_config['params']['config']['train_dir'] = log_dir
rl_config['params']['config']['name'] = "T1_GetUp"
# --- 关键修改:注入模型路径 ---
if should_retrain:
print(f"[INFO]: 检测到预训练模型,正在从 {checkpoint_path} 恢复训练...")
# rl_games 会读取 config 中的 load_path 进行续训
rl_config['params']['config']['load_path'] = checkpoint_path
else:
print("[INFO]: 未找到预训练模型,将从零开始训练。")
# 7. 运行训练
runner = Runner()
runner.load(rl_config)
print(f"[INFO]: 开始训练任务 {args_cli.task},环境数量: {args_cli.num_envs}")
runner.run({
"train": True,
"play": False,
# 如果你想强制从某个 checkpoint 开始,也可以在这里传参
"checkpoint": checkpoint_path if should_retrain else None,
"vec_env": wrapped_env
})