The demo of get up

This commit is contained in:
2026-03-16 05:00:20 -04:00
parent 1b4c0913b7
commit 4b0b1fac8d
7 changed files with 341 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
import gymnasium as gym
# 导入你的配置
from rl_game.demo.config.t1_env_cfg import T1EnvCfg
# 注册环境到 Gymnasium
gym.register(
id="Isaac-T1-GetUp-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口
kwargs={
"cfg": T1EnvCfg(),
},
)

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,60 @@
params:
seed: 42
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 256, 128]
activation: relu
d2rl: False
initializer:
name: default
config:
name: T1_Walking
env_name: rlgym # Isaac Lab 包装器
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: 16384 # 同时训练的机器人数量
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.96
tau: 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.013
score_to_win: 20000
max_epochs: 5000
save_best_after: 50
save_frequency: 100
grad_norm: 1.0
entropy_coef: 0.02
truncate_grads: True
bounds_loss_coef: 0.001
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 4
critic_coef: 2
clip_value: True

View File

@@ -0,0 +1,116 @@
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg
import isaaclab.envs.mdp as mdp
@configclass
class T1ObservationCfg:
"""观察值:必须包含姿态和高度,否则机器人不知道自己是怎么躺着的"""
@configclass
class PolicyCfg(ObsGroup):
concatenate_terms = True
enable_corruption = False
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
projected_gravity = ObsTerm(func=mdp.projected_gravity)
# ⬅️ 新增:让机器人知道自己离地多高
root_height = ObsTerm(func=mdp.root_height)
joint_pos = ObsTerm(func=mdp.joint_pos_rel)
joint_vel = ObsTerm(func=mdp.joint_vel_rel)
actions = ObsTerm(func=mdp.last_action)
policy = PolicyCfg()
@configclass
class T1EventCfg:
"""事件配置:实现多种随机躺下姿态的关键"""
# 每次重置时,随机给基座一个旋转角度
# 包括:背躺 (Pitch=1.57), 趴着 (Pitch=-1.57), 侧躺 (Roll=1.57)
reset_robot_rotation = EventTerm(
func=mdp.reset_root_custom_orientation,
params={
"euler_angles_range": {
"roll": (-1.57, 1.57), # 覆盖侧躺
"pitch": (-1.57, 1.57), # 覆盖趴下和背躺
"yaw": (-3.14, 3.14), # 随机朝向
},
"asset_cfg": SceneEntityCfg("robot"),
},
mode="reset",
)
@configclass
class T1GetUpRewardCfg:
"""起身任务专用奖励:快、直、稳"""
# 1. 核心进度:高度奖励
# 只要低于目标高度(0.6m)就会持续根据距离给负分越高分数越接近0
height_progress = RewTerm(
func=mdp.root_height_below_minimum,
weight=15.0,
params={"minimum_height": 0.65}
)
# 2. 速度惩罚:时间就是金钱
# 每一帧都扣 0.5 分,起身越慢,被扣掉的总分就越多
time_penalty = RewTerm(
func=mdp.is_alive,
weight=-0.5
)
# 3. 姿态奖励:上半身必须垂直向上
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0)
# 4. 平滑惩罚:防止起身时由于急躁导致的剧烈抖动
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.001)
@configclass
class T1TerminationsCfg:
"""终止条件"""
# 提前成功:如果站起来了并保持稳定,直接结束 Episode 并拿奖励,节省算力
is_standing_success = DoneTerm(
func=mdp.root_height_below_minimum,
params={"minimum_height": 0.65, "inverted": True} # 高于 0.65m 则重置
)
time_out = DoneTerm(func=mdp.time_out)
@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
"""主环境配置:针对 T1 起身任务"""
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)
# 覆盖初始位置:必须低高度生成,防止随机旋转后高空坠落
def __post_init__(self):
super().__post_init__()
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
observations = T1ObservationCfg()
rewards = T1GetUpRewardCfg()
terminations = T1TerminationsCfg()
events = T1EventCfg() # ⬅️ 挂载随机姿态事件
actions = JointPositionActionCfg(
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
)
# 训练参数
episode_length_s = 5.0 # 强制 5秒内必须站起来
decimation = 4 # 提高控制频率 (约 50Hz) 有助于精细动作

60
rl_game/get_up/env/t1_env.py vendored Normal file
View File

@@ -0,0 +1,60 @@
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab import sim as sim_utils
import os
_DEMO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "t1_locomotion_physics.usd")
@configclass
class T1SceneCfg(InteractiveSceneCfg):
"""修改后的 T1 机器人场景配置,适配随机姿态起身任务"""
# 1. 地面配置 (如果之前没定义,必须加上)
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(),
)
# 2. 机器人配置
robot = ArticulationCfg(
prim_path="{ENV_REGEX_NS}/Robot",
spawn=sim_utils.UsdFileCfg(
usd_path=T1_USD_PATH,
activate_contact_sensors=True,
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=10.0, # ⬅️ 提高穿透修正速度,防止肢体卡在地下
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=True, # 开启自碰撞,起身时肢体可能互碰
solver_position_iteration_count=8, # ⬅️ 增加迭代次数,提高物理稳定性
solver_velocity_iteration_count=4,
),
),
init_state=ArticulationCfg.InitialStateCfg(
# ⬅️ 核心修改高度降低。因为是躺着生成0.2m 比较合适
pos=(0.0, 0.0, 0.2),
# 默认旋转设为单位阵,具体的随机化由 Event 管理器处理
rot=(1.0, 0.0, 0.0, 0.0),
joint_pos={".*": 0.0},
),
actuators={
"legs": ImplicitActuatorCfg(
joint_names_expr=[".*"],
effort_limit=400.0, # T1 通常有较强的力矩输出
velocity_limit=20.0, # ⬅️ 调高速度限制,起身需要爆发动作
stiffness=150.0, # ⬅️ 调高刚度:起身需要更“硬”的支撑
damping=5.0, # ⬅️ 增加阻尼:防止机器人站起瞬间晃动太厉害
),
},
)
# 3. 光照配置
light = AssetBaseCfg(
prim_path="/World/light",
spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
)

92
rl_game/get_up/train.py Normal file
View File

@@ -0,0 +1,92 @@
import sys
import os
import argparse
# 确保能找到项目根目录下的模块
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from isaaclab.app import AppLauncher
# 1. 配置启动参数
parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.")
parser.add_argument("--num_envs", type=int, default=16384, help="起身任务建议并行 4096 即可")
parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID")
parser.add_argument("--seed", type=int, default=42, help="随机种子")
AppLauncher.add_app_launcher_args(parser)
args_cli = parser.parse_args()
# 2. 启动仿真器(必须在导入其他 isaaclab 模块前)
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
import torch
import gymnasium as gym
import yaml
from isaaclab_rl.rl_games import RlGamesVecEnvWrapper
from rl_games.torch_runner import Runner
from rl_games.common import env_configurations, vecenv
# 导入你刚刚修改好的配置类
# 假设你的文件名是 t1_getup_cfg.py类名是 T1EnvCfg
from config.t1_env_cfg import T1EnvCfg
# 3. 注册环境
gym.register(
id="Isaac-T1-GetUp-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
kwargs={
"cfg": T1EnvCfg(), # 这里会加载你设置的随机旋转、时间惩罚等
},
)
def main():
# 4. 创建环境,显式传入命令行指定的 num_envs
env = gym.make("Isaac-T1-GetUp-v0", num_envs=args_cli.num_envs)
# 5. 包装环境
# 注意rl_device 必须设置为 args_cli.device (通常是 'cuda:0')
wrapped_env = RlGamesVecEnvWrapper(
env,
rl_device=args_cli.device,
clip_obs=5.0,
clip_actions=1.0 # 动作裁剪建议设小一点,防止电机输出瞬间爆表
)
# 注册给 rl_games 使用
vecenv.register('as_is', lambda config_name, num_actors, **kwargs: wrapped_env)
env_configurations.register('rlgym', {
'vecenv_type': 'as_is',
'env_creator': lambda **kwargs: wrapped_env
})
# 6. 加载 PPO 配置文件
# 提示:由于是起身任务,建议在 ppo_cfg.yaml 中调大 mini_batch 大数或提高学习率
config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml")
with open(config_path, "r") as f:
rl_config = yaml.safe_load(f)
# 设置日志和实验名称
rl_game_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
log_dir = os.path.join(rl_game_dir, "logs")
rl_config['params']['config']['train_dir'] = log_dir
rl_config['params']['config']['name'] = "T1_GetUp_Experiment"
# 7. 运行训练
runner = Runner()
runner.load(rl_config)
print(f"[INFO]: 开始训练任务 {args_cli.task},环境数量: {args_cli.num_envs}")
runner.run({
"train": True,
"play": False,
"vec_env": wrapped_env
})
simulation_app.close()
if __name__ == "__main__":
main()