diff --git a/rl_game/demo/__init__.py b/rl_game/demo/__init__.py new file mode 100644 index 0000000..181dd44 --- /dev/null +++ b/rl_game/demo/__init__.py @@ -0,0 +1,13 @@ +import gymnasium as gym + +# 导入你的配置 +from rl_game.demo.config.t1_env_cfg import T1EnvCfg + +# 注册环境到 Gymnasium +gym.register( + id="Isaac-T1-Walking-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口 + kwargs={ + "cfg": T1EnvCfg(), + }, +) \ No newline at end of file diff --git a/rl_game/demo/asset/t1/T1_locomotion_base.usd b/rl_game/demo/asset/t1/T1_locomotion_base.usd new file mode 100644 index 0000000..9787643 Binary files /dev/null and b/rl_game/demo/asset/t1/T1_locomotion_base.usd differ diff --git a/rl_game/demo/asset/t1/t1_locomotion_physics.usd b/rl_game/demo/asset/t1/t1_locomotion_physics.usd new file mode 100644 index 0000000..654f8b7 Binary files /dev/null and b/rl_game/demo/asset/t1/t1_locomotion_physics.usd differ diff --git a/rl_game/demo/config/ppo_cfg.yaml b/rl_game/demo/config/ppo_cfg.yaml new file mode 100644 index 0000000..3c94b6c --- /dev/null +++ b/rl_game/demo/config/ppo_cfg.yaml @@ -0,0 +1,60 @@ +params: + seed: 42 + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [512, 256, 128] + activation: elu + d2rl: False + initializer: + name: default + + config: + name: T1_Walking + env_name: rlgym # Isaac Lab 包装器 + multi_gpu: False + ppo: True + mixed_precision: True + normalize_input: True + normalize_value: True + value_bootstrap: True + num_actors: 16384 # 同时训练的机器人数量 + reward_shaper: + scale_value: 1.0 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 3e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 5000 + save_best_after: 50 + save_frequency: 100 + grad_norm: 1.0 + entropy_coef: 0.01 + truncate_grads: True + bounds_loss_coef: 0.0 + e_clip: 0.2 + horizon_length: 128 + minibatch_size: 32768 + mini_epochs: 5 + critic_coef: 2 + clip_value: True \ No newline at end of file diff --git a/rl_game/demo/config/t1_env_cfg.py b/rl_game/demo/config/t1_env_cfg.py new file mode 100644 index 0000000..3fbfe88 --- /dev/null +++ b/rl_game/demo/config/t1_env_cfg.py @@ -0,0 +1,94 @@ +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.envs.mdp import JointPositionActionCfg +import isaaclab.envs.mdp as mdp +from isaaclab.utils import configclass +from rl_game.demo.env.t1_env import T1SceneCfg + +@configclass +class T1ObservationCfg: + """观察值空间配置容器""" + + @configclass + class PolicyCfg(ObsGroup): + concatenate_terms = True + enable_corruption = False + + # ⬅️ 2. 修改点:直接使用 mdp.函数名,不要引号 + base_lin_vel = ObsTerm(func=mdp.base_lin_vel) + base_ang_vel = ObsTerm(func=mdp.base_ang_vel) + projected_gravity = ObsTerm(func=mdp.projected_gravity) + joint_pos = ObsTerm(func=mdp.joint_pos_rel) + joint_vel = ObsTerm(func=mdp.joint_vel_rel) + actions = ObsTerm(func=mdp.last_action) + + policy = PolicyCfg() + +@configclass +class T1ActionCfg: + """动作空间配置""" + joint_pos = JointPositionActionCfg( + asset_name="robot", # 注意这里是 asset_name,对应场景里的机器人名称 + joint_names=[".*"], # 控制所有关节 + scale=0.5, # 缩放网络输出 + use_default_offset=True # 动作是相对于默认关节角度(init_state里的0)的偏移 + ) + +@configclass +class T1TerminationsCfg: + """终止条件:什么时候重置环境""" + # 1. 摔倒重置:如果躯干高度低于 0.35米 (假设 T1 胯部在 0.7米) + base_height_too_low = DoneTerm( + func=mdp.root_height_below_minimum, + params={"minimum_height": 0.35}, + ) + # 2. 存活时间限制 (Timeout) + time_out = DoneTerm(func=mdp.time_out) + +@configclass +class T1CommandsCfg: + """命令配置:定义机器人的目标速度""" + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(0.5, 1.5), + lin_vel_y=(0.0, 0.0), + ang_vel_z=(-0.1, 0.1), + ), + ) + +@configclass +class T1RewardCfg: + """奖励函数配置:鼓励向前走,惩罚摔倒和过大能耗""" + # 速度追踪奖励 (假设目标是沿 X 轴走) + track_lin_vel_xy_exp = RewTerm( + func=mdp.track_lin_vel_xy_exp, + weight=1.0, + params={ + "std": 0.5, + "command_name": "base_velocity" + } + ) + # 姿态惩罚 (保持上半身直立) + upright = RewTerm(func=mdp.flat_orientation_l2, weight=0.1) + # 动作平滑惩罚 + action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01) + +@configclass +class T1EnvCfg(ManagerBasedRLEnvCfg): + """主环境配置""" + # 场景设置 + scene = T1SceneCfg(num_envs=16384, env_spacing=2.5) + # 观察与奖励 + observations = T1ObservationCfg() + rewards = T1RewardCfg() + terminations = T1TerminationsCfg() + actions = T1ActionCfg() + commands = T1CommandsCfg() + episode_length_s = 20.0 + # 默认步长 + decimation = 6 # 仿真频率/控制频率 \ No newline at end of file diff --git a/rl_game/demo/env/t1_env.py b/rl_game/demo/env/t1_env.py new file mode 100644 index 0000000..12b5c72 --- /dev/null +++ b/rl_game/demo/env/t1_env.py @@ -0,0 +1,50 @@ +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab import sim as sim_utils +import os + +_DEMO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "t1_locomotion_physics.usd") + +@configclass +class T1SceneCfg(InteractiveSceneCfg): + """T1 机器人的场景配置,包含地面、机器人和光照""" + + # T1 机器人配置 + # 注意:你需要将 usd_path 替换为你真实的 T1 机器人 USD 文件路径 + robot = ArticulationCfg( + prim_path="{ENV_REGEX_NS}/Robot", + spawn=sim_utils.UsdFileCfg( + usd_path=T1_USD_PATH, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + retain_accelerations=False, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=1000.0, + max_depenetration_velocity=1.0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=True, + solver_position_iteration_count=4, + solver_velocity_iteration_count=0 + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 0.7), # 初始高度,确保机器人双脚着地而非穿模 + joint_pos={".*": 0.0}, # 所有关节初始角度为 0 + ), + actuators={ + "legs": ImplicitActuatorCfg( + joint_names_expr=[".*"], # 匹配所有关节,也可以指定具体名称如 ["L_Hip.*", "R_Hip.*"] + effort_limit=400.0, + velocity_limit=10.0, + stiffness=85.0, # P 增益 + damping=2.0, # D 增益 + ), + }, + ) diff --git a/rl_game/demo/play.py b/rl_game/demo/play.py new file mode 100644 index 0000000..d07f8ad --- /dev/null +++ b/rl_game/demo/play.py @@ -0,0 +1,57 @@ +import sys +import os +# 确保当前目录 (demo) 在 sys.path 中 +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +import argparse +from isaaclab.app import AppLauncher + +# 启动参数,默认关闭 headless 模式以便观察 +parser = argparse.ArgumentParser(description="Play trained T1 policy.") +parser.add_argument("--checkpoint", type=str, help="Path to the trained model checkpoint.") +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() + +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +import torch +import gymnasium as gym +from isaaclab_rl.rl_games import RlGamesVecEnvWrapper +from rl_games.torch_runner import Runner +import yaml +import os + +from rl_game.demo.config.t1_env_cfg import T1EnvCfg + +gym.register( + id="Isaac-T1-Walking-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口 + kwargs={ + "cfg": T1EnvCfg(), + }, +) + + +def main(): + # 1. 创建环境 (只给 1 个机器人看演示) + env = gym.make("Isaac-T1-Walking-v0", num_envs=1, render_mode="rgb_array") + wrapped_env = RlGamesVecEnvWrapper(env) + + # 2. 加载配置并推理 + config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml") + with open(config_path, "r") as f: + rl_config = yaml.safe_load(f) + + # 注入 checkpoint 路径 + rl_config['params']['config']['checkpoint'] = args_cli.checkpoint + + runner = Runner() + runner.load(rl_config) + # 运行推理模式 + runner.run({"vecenv": wrapped_env, "train": False}) + + simulation_app.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/rl_game/demo/train.py b/rl_game/demo/train.py new file mode 100644 index 0000000..58bece0 --- /dev/null +++ b/rl_game/demo/train.py @@ -0,0 +1,83 @@ +import sys +import os +# 关键:确保当前目录在 sys.path 中,这样才能直接 from config 导入 +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +import argparse +from isaaclab.app import AppLauncher + +# 添加启动参数 +parser = argparse.ArgumentParser(description="Train T1 robot with rl_games.") +parser.add_argument("--num_envs", type=int, default=16384, help="Number of envs to run.") +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() + +# 启动仿真器 +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +import torch +import gymnasium as gym +from isaaclab_rl.rl_games import RlGamesVecEnvWrapper +from rl_games.torch_runner import Runner +import yaml +from config.t1_env_cfg import T1EnvCfg +from rl_games.common import env_configurations, vecenv + +gym.register( + id="Isaac-T1-Walking-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口 + kwargs={ + "cfg": T1EnvCfg(), + }, +) + +def main(): + # 1. 创建环境 (保持不变) + env = gym.make("Isaac-T1-Walking-v0", num_envs=args_cli.num_envs) + + # 2. 包装环境 (保持不变) + wrapped_env = RlGamesVecEnvWrapper( + env, + rl_device=args_cli.device, + clip_obs=5.0, + clip_actions=100.0 + ) + + vecenv.register('as_is', lambda config_name, num_actors, **kwargs: wrapped_env) + + # 注册环境配置 + env_configurations.register('rlgym', { + 'vecenv_type': 'as_is', + 'env_creator': lambda **kwargs: wrapped_env + }) + + # 3. 加载 PPO 配置 (保持不变) + config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml") + with open(config_path, "r") as f: + rl_config = yaml.safe_load(f) + + # 设置日志路径 + rl_game_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".")) + log_dir = os.path.join(rl_game_dir, "logs") + rl_config['params']['config']['train_dir'] = log_dir + + # 4. 启动训练 + runner = Runner() + + # 此时 rl_config 只有文本和数字,没有复杂对象,deepcopy 会成功 + runner.load(rl_config) + + # 在 run 时传入对象是安全的 + runner.run({ + "train": True, + "play": False, + "vec_env": wrapped_env + }) + + simulation_app.close() + +# PYTHONPATH=. python rl_game/your_file_name/train.py + +if __name__ == "__main__": + main() \ No newline at end of file