diff --git a/rl_game/get_up/__init__.py b/rl_game/get_up/__init__.py new file mode 100644 index 0000000..f9ad0fb --- /dev/null +++ b/rl_game/get_up/__init__.py @@ -0,0 +1,13 @@ +import gymnasium as gym + +# 导入你的配置 +from rl_game.demo.config.t1_env_cfg import T1EnvCfg + +# 注册环境到 Gymnasium +gym.register( + id="Isaac-T1-GetUp-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口 + kwargs={ + "cfg": T1EnvCfg(), + }, +) \ No newline at end of file diff --git a/rl_game/get_up/asset/t1/T1_locomotion_base.usd b/rl_game/get_up/asset/t1/T1_locomotion_base.usd new file mode 100644 index 0000000..9787643 Binary files /dev/null and b/rl_game/get_up/asset/t1/T1_locomotion_base.usd differ diff --git a/rl_game/get_up/asset/t1/t1_locomotion_physics.usd b/rl_game/get_up/asset/t1/t1_locomotion_physics.usd new file mode 100644 index 0000000..654f8b7 Binary files /dev/null and b/rl_game/get_up/asset/t1/t1_locomotion_physics.usd differ diff --git a/rl_game/get_up/config/ppo_cfg.yaml b/rl_game/get_up/config/ppo_cfg.yaml new file mode 100644 index 0000000..8ae94f7 --- /dev/null +++ b/rl_game/get_up/config/ppo_cfg.yaml @@ -0,0 +1,60 @@ +params: + seed: 42 + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [512, 256, 128] + activation: relu + d2rl: False + initializer: + name: default + + config: + name: T1_Walking + env_name: rlgym # Isaac Lab 包装器 + multi_gpu: False + ppo: True + mixed_precision: True + normalize_input: True + normalize_value: True + value_bootstrap: True + num_actors: 16384 # 同时训练的机器人数量 + reward_shaper: + scale_value: 1.0 + normalize_advantage: True + gamma: 0.96 + tau: 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.013 + score_to_win: 20000 + max_epochs: 5000 + save_best_after: 50 + save_frequency: 100 + grad_norm: 1.0 + entropy_coef: 0.02 + truncate_grads: True + bounds_loss_coef: 0.001 + e_clip: 0.2 + horizon_length: 32 + minibatch_size: 16384 + mini_epochs: 4 + critic_coef: 2 + clip_value: True \ No newline at end of file diff --git a/rl_game/get_up/config/t1_env_cfg.py b/rl_game/get_up/config/t1_env_cfg.py new file mode 100644 index 0000000..3a4b86c --- /dev/null +++ b/rl_game/get_up/config/t1_env_cfg.py @@ -0,0 +1,116 @@ +from isaaclab.assets import ArticulationCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.envs.mdp import JointPositionActionCfg +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass +from rl_game.get_up.env.t1_env import T1SceneCfg + +import isaaclab.envs.mdp as mdp + + +@configclass +class T1ObservationCfg: + """观察值:必须包含姿态和高度,否则机器人不知道自己是怎么躺着的""" + + @configclass + class PolicyCfg(ObsGroup): + concatenate_terms = True + enable_corruption = False + + base_lin_vel = ObsTerm(func=mdp.base_lin_vel) + base_ang_vel = ObsTerm(func=mdp.base_ang_vel) + projected_gravity = ObsTerm(func=mdp.projected_gravity) + # ⬅️ 新增:让机器人知道自己离地多高 + root_height = ObsTerm(func=mdp.root_height) + joint_pos = ObsTerm(func=mdp.joint_pos_rel) + joint_vel = ObsTerm(func=mdp.joint_vel_rel) + actions = ObsTerm(func=mdp.last_action) + + policy = PolicyCfg() + + +@configclass +class T1EventCfg: + """事件配置:实现多种随机躺下姿态的关键""" + + # 每次重置时,随机给基座一个旋转角度 + # 包括:背躺 (Pitch=1.57), 趴着 (Pitch=-1.57), 侧躺 (Roll=1.57) + reset_robot_rotation = EventTerm( + func=mdp.reset_root_custom_orientation, + params={ + "euler_angles_range": { + "roll": (-1.57, 1.57), # 覆盖侧躺 + "pitch": (-1.57, 1.57), # 覆盖趴下和背躺 + "yaw": (-3.14, 3.14), # 随机朝向 + }, + "asset_cfg": SceneEntityCfg("robot"), + }, + mode="reset", + ) + + +@configclass +class T1GetUpRewardCfg: + """起身任务专用奖励:快、直、稳""" + + # 1. 核心进度:高度奖励 + # 只要低于目标高度(0.6m),就会持续根据距离给负分,越高分数越接近0 + height_progress = RewTerm( + func=mdp.root_height_below_minimum, + weight=15.0, + params={"minimum_height": 0.65} + ) + + # 2. 速度惩罚:时间就是金钱 + # 每一帧都扣 0.5 分,起身越慢,被扣掉的总分就越多 + time_penalty = RewTerm( + func=mdp.is_alive, + weight=-0.5 + ) + + # 3. 姿态奖励:上半身必须垂直向上 + upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0) + + # 4. 平滑惩罚:防止起身时由于急躁导致的剧烈抖动 + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01) + joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.001) + + +@configclass +class T1TerminationsCfg: + """终止条件""" + # 提前成功:如果站起来了并保持稳定,直接结束 Episode 并拿奖励,节省算力 + is_standing_success = DoneTerm( + func=mdp.root_height_below_minimum, + params={"minimum_height": 0.65, "inverted": True} # 高于 0.65m 则重置 + ) + time_out = DoneTerm(func=mdp.time_out) + + +@configclass +class T1EnvCfg(ManagerBasedRLEnvCfg): + """主环境配置:针对 T1 起身任务""" + scene = T1SceneCfg(num_envs=16384, env_spacing=2.5) + + # 覆盖初始位置:必须低高度生成,防止随机旋转后高空坠落 + def __post_init__(self): + super().__post_init__() + self.scene.robot.init_state.pos = (0.0, 0.0, 0.2) + + observations = T1ObservationCfg() + rewards = T1GetUpRewardCfg() + terminations = T1TerminationsCfg() + events = T1EventCfg() # ⬅️ 挂载随机姿态事件 + + actions = JointPositionActionCfg( + asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True + ) + + # 训练参数 + episode_length_s = 5.0 # 强制 5秒内必须站起来 + decimation = 4 # 提高控制频率 (约 50Hz) 有助于精细动作 \ No newline at end of file diff --git a/rl_game/get_up/env/t1_env.py b/rl_game/get_up/env/t1_env.py new file mode 100644 index 0000000..1cfda5d --- /dev/null +++ b/rl_game/get_up/env/t1_env.py @@ -0,0 +1,60 @@ +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab import sim as sim_utils + +import os + +_DEMO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "t1_locomotion_physics.usd") + +@configclass +class T1SceneCfg(InteractiveSceneCfg): + """修改后的 T1 机器人场景配置,适配随机姿态起身任务""" + + # 1. 地面配置 (如果之前没定义,必须加上) + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(), + ) + + # 2. 机器人配置 + robot = ArticulationCfg( + prim_path="{ENV_REGEX_NS}/Robot", + spawn=sim_utils.UsdFileCfg( + usd_path=T1_USD_PATH, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=10.0, # ⬅️ 提高穿透修正速度,防止肢体卡在地下 + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=True, # 开启自碰撞,起身时肢体可能互碰 + solver_position_iteration_count=8, # ⬅️ 增加迭代次数,提高物理稳定性 + solver_velocity_iteration_count=4, + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + # ⬅️ 核心修改:高度降低。因为是躺着生成,0.2m 比较合适 + pos=(0.0, 0.0, 0.2), + # 默认旋转设为单位阵,具体的随机化由 Event 管理器处理 + rot=(1.0, 0.0, 0.0, 0.0), + joint_pos={".*": 0.0}, + ), + actuators={ + "legs": ImplicitActuatorCfg( + joint_names_expr=[".*"], + effort_limit=400.0, # T1 通常有较强的力矩输出 + velocity_limit=20.0, # ⬅️ 调高速度限制,起身需要爆发动作 + stiffness=150.0, # ⬅️ 调高刚度:起身需要更“硬”的支撑 + damping=5.0, # ⬅️ 增加阻尼:防止机器人站起瞬间晃动太厉害 + ), + }, + ) + + # 3. 光照配置 + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0), + ) \ No newline at end of file diff --git a/rl_game/get_up/train.py b/rl_game/get_up/train.py new file mode 100644 index 0000000..065b87c --- /dev/null +++ b/rl_game/get_up/train.py @@ -0,0 +1,92 @@ +import sys +import os +import argparse + +# 确保能找到项目根目录下的模块 +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +from isaaclab.app import AppLauncher + +# 1. 配置启动参数 +parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.") +parser.add_argument("--num_envs", type=int, default=16384, help="起身任务建议并行 4096 即可") +parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID") +parser.add_argument("--seed", type=int, default=42, help="随机种子") +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() + +# 2. 启动仿真器(必须在导入其他 isaaclab 模块前) +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +import torch +import gymnasium as gym +import yaml +from isaaclab_rl.rl_games import RlGamesVecEnvWrapper +from rl_games.torch_runner import Runner +from rl_games.common import env_configurations, vecenv + +# 导入你刚刚修改好的配置类 +# 假设你的文件名是 t1_getup_cfg.py,类名是 T1EnvCfg +from config.t1_env_cfg import T1EnvCfg + +# 3. 注册环境 +gym.register( + id="Isaac-T1-GetUp-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "cfg": T1EnvCfg(), # 这里会加载你设置的随机旋转、时间惩罚等 + }, +) + + +def main(): + # 4. 创建环境,显式传入命令行指定的 num_envs + env = gym.make("Isaac-T1-GetUp-v0", num_envs=args_cli.num_envs) + + # 5. 包装环境 + # 注意:rl_device 必须设置为 args_cli.device (通常是 'cuda:0') + wrapped_env = RlGamesVecEnvWrapper( + env, + rl_device=args_cli.device, + clip_obs=5.0, + clip_actions=1.0 # 动作裁剪建议设小一点,防止电机输出瞬间爆表 + ) + + # 注册给 rl_games 使用 + vecenv.register('as_is', lambda config_name, num_actors, **kwargs: wrapped_env) + + env_configurations.register('rlgym', { + 'vecenv_type': 'as_is', + 'env_creator': lambda **kwargs: wrapped_env + }) + + # 6. 加载 PPO 配置文件 + # 提示:由于是起身任务,建议在 ppo_cfg.yaml 中调大 mini_batch 大数或提高学习率 + config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml") + with open(config_path, "r") as f: + rl_config = yaml.safe_load(f) + + # 设置日志和实验名称 + rl_game_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".")) + log_dir = os.path.join(rl_game_dir, "logs") + rl_config['params']['config']['train_dir'] = log_dir + rl_config['params']['config']['name'] = "T1_GetUp_Experiment" + + # 7. 运行训练 + runner = Runner() + runner.load(rl_config) + + print(f"[INFO]: 开始训练任务 {args_cli.task},环境数量: {args_cli.num_envs}") + + runner.run({ + "train": True, + "play": False, + "vec_env": wrapped_env + }) + + simulation_app.close() + + +if __name__ == "__main__": + main() \ No newline at end of file