3 Commits

Author SHA1 Message Date
02c06c23ad add some codes to make retain come true 2026-03-17 05:56:26 -04:00
1b4c0913b7 delete play.py commit it in run 2026-03-16 04:08:42 -04:00
f0a5f8f4b7 The example of train-T1 2026-03-15 20:14:06 -04:00
7 changed files with 318 additions and 0 deletions

13
rl_game/demo/__init__.py Normal file
View File

@@ -0,0 +1,13 @@
import gymnasium as gym
# 导入你的配置
from rl_game.demo.config.t1_env_cfg import T1EnvCfg
# 注册环境到 Gymnasium
gym.register(
id="Isaac-T1-Walking-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv", # Isaac Lab 统一的强化学习环境入口
kwargs={
"cfg": T1EnvCfg(),
},
)

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,60 @@
params:
seed: 42
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 256, 128]
activation: elu
d2rl: False
initializer:
name: default
config:
name: T1_Walking
env_name: rlgym # Isaac Lab 包装器
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: 16384 # 同时训练的机器人数量
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 5000
save_best_after: 50
save_frequency: 100
grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True
bounds_loss_coef: 0.0
e_clip: 0.2
horizon_length: 128
minibatch_size: 32768
mini_epochs: 5
critic_coef: 2
clip_value: True

View File

@@ -0,0 +1,94 @@
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.envs.mdp import JointPositionActionCfg
import isaaclab.envs.mdp as mdp
from isaaclab.utils import configclass
from rl_game.demo.env.t1_env import T1SceneCfg
@configclass
class T1ObservationCfg:
"""观察值空间配置容器"""
@configclass
class PolicyCfg(ObsGroup):
concatenate_terms = True
enable_corruption = False
# ⬅️ 2. 修改点:直接使用 mdp.函数名,不要引号
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
projected_gravity = ObsTerm(func=mdp.projected_gravity)
joint_pos = ObsTerm(func=mdp.joint_pos_rel)
joint_vel = ObsTerm(func=mdp.joint_vel_rel)
actions = ObsTerm(func=mdp.last_action)
policy = PolicyCfg()
@configclass
class T1ActionCfg:
"""动作空间配置"""
joint_pos = JointPositionActionCfg(
asset_name="robot", # 注意这里是 asset_name对应场景里的机器人名称
joint_names=[".*"], # 控制所有关节
scale=0.5, # 缩放网络输出
use_default_offset=True # 动作是相对于默认关节角度(init_state里的0)的偏移
)
@configclass
class T1TerminationsCfg:
"""终止条件:什么时候重置环境"""
# 1. 摔倒重置:如果躯干高度低于 0.35米 (假设 T1 胯部在 0.7米)
base_height_too_low = DoneTerm(
func=mdp.root_height_below_minimum,
params={"minimum_height": 0.35},
)
# 2. 存活时间限制 (Timeout)
time_out = DoneTerm(func=mdp.time_out)
@configclass
class T1CommandsCfg:
"""命令配置:定义机器人的目标速度"""
base_velocity = mdp.UniformVelocityCommandCfg(
asset_name="robot",
resampling_time_range=(10.0, 10.0),
ranges=mdp.UniformVelocityCommandCfg.Ranges(
lin_vel_x=(0.5, 1.5),
lin_vel_y=(0.0, 0.0),
ang_vel_z=(-0.1, 0.1),
),
)
@configclass
class T1RewardCfg:
"""奖励函数配置:鼓励向前走,惩罚摔倒和过大能耗"""
# 速度追踪奖励 (假设目标是沿 X 轴走)
track_lin_vel_xy_exp = RewTerm(
func=mdp.track_lin_vel_xy_exp,
weight=1.0,
params={
"std": 0.5,
"command_name": "base_velocity"
}
)
# 姿态惩罚 (保持上半身直立)
upright = RewTerm(func=mdp.flat_orientation_l2, weight=0.1)
# 动作平滑惩罚
action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
"""主环境配置"""
# 场景设置
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)
# 观察与奖励
observations = T1ObservationCfg()
rewards = T1RewardCfg()
terminations = T1TerminationsCfg()
actions = T1ActionCfg()
commands = T1CommandsCfg()
episode_length_s = 20.0
# 默认步长
decimation = 6 # 仿真频率/控制频率

50
rl_game/demo/env/t1_env.py vendored Normal file
View File

@@ -0,0 +1,50 @@
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
from isaaclab.scene import InteractiveSceneCfg
from isaaclab.utils import configclass
from isaaclab.actuators import ImplicitActuatorCfg
from isaaclab import sim as sim_utils
import os
_DEMO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
T1_USD_PATH = os.path.join(_DEMO_DIR, "asset", "t1", "t1_locomotion_physics.usd")
@configclass
class T1SceneCfg(InteractiveSceneCfg):
"""T1 机器人的场景配置,包含地面、机器人和光照"""
# T1 机器人配置
# 注意:你需要将 usd_path 替换为你真实的 T1 机器人 USD 文件路径
robot = ArticulationCfg(
prim_path="{ENV_REGEX_NS}/Robot",
spawn=sim_utils.UsdFileCfg(
usd_path=T1_USD_PATH,
activate_contact_sensors=True,
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
retain_accelerations=False,
linear_damping=0.0,
angular_damping=0.0,
max_linear_velocity=1000.0,
max_angular_velocity=1000.0,
max_depenetration_velocity=1.0,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=True,
solver_position_iteration_count=4,
solver_velocity_iteration_count=0
),
),
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 0.7), # 初始高度,确保机器人双脚着地而非穿模
joint_pos={".*": 0.0}, # 所有关节初始角度为 0
),
actuators={
"legs": ImplicitActuatorCfg(
joint_names_expr=[".*"], # 匹配所有关节,也可以指定具体名称如 ["L_Hip.*", "R_Hip.*"]
effort_limit=400.0,
velocity_limit=10.0,
stiffness=85.0, # P 增益
damping=2.0, # D 增益
),
},
)

101
rl_game/demo/train.py Normal file
View File

@@ -0,0 +1,101 @@
import sys
import os
import argparse
# 确保能找到项目根目录下的模块
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from isaaclab.app import AppLauncher
# 1. 配置启动参数
parser = argparse.ArgumentParser(description="Train T1 robot to Get-Up with RL-Games.")
parser.add_argument("--num_envs", type=int, default=16384, help="起身任务建议并行 4096 即可")
parser.add_argument("--task", type=str, default="Isaac-T1-GetUp-v0", help="任务 ID")
parser.add_argument("--seed", type=int, default=42, help="随机种子")
AppLauncher.add_app_launcher_args(parser)
args_cli = parser.parse_args()
# 2. 启动仿真器(必须在导入其他 isaaclab 模块前)
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
import torch
import gymnasium as gym
import yaml
from isaaclab_rl.rl_games import RlGamesVecEnvWrapper
from rl_games.torch_runner import Runner
from rl_games.common import env_configurations, vecenv
# 导入你刚刚修改好的配置类
# 假设你的文件名是 t1_getup_cfg.py类名是 T1EnvCfg
from config.t1_env_cfg import T1EnvCfg
# 3. 注册环境
gym.register(
id="Isaac-T1-Walk-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
kwargs={
"cfg": T1EnvCfg(), # 这里会加载你设置的随机旋转、时间惩罚等
},
)
def main():
# --- 新增:处理 Retrain 参数 ---
# 你可以手动指定路径,或者在 argparse 里增加一个 --checkpoint 参数
checkpoint_path = os.path.join(os.path.dirname(__file__), "logs/T1_GetUp/nn/**.pth")
# 检查模型文件是否存在
should_retrain = os.path.exists(checkpoint_path)
env = gym.make("Isaac-T1-Walk-v0", num_envs=args_cli.num_envs)
# 注意rl_device 必须设置为 args_cli.device (通常是 'cuda:0')
wrapped_env = RlGamesVecEnvWrapper(
env,
rl_device=args_cli.device,
clip_obs=5.0,
clip_actions=1.0
)
vecenv.register('as_is', lambda config_name, num_actors, **kwargs: wrapped_env)
env_configurations.register('rlgym', {
'vecenv_type': 'as_is',
'env_creator': lambda **kwargs: wrapped_env
})
config_path = os.path.join(os.path.dirname(__file__), "config", "ppo_cfg.yaml")
with open(config_path, "r") as f:
rl_config = yaml.safe_load(f)
# 设置日志和实验名称
rl_game_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
log_dir = os.path.join(rl_game_dir, "logs")
rl_config['params']['config']['train_dir'] = log_dir
rl_config['params']['config']['name'] = "T1_GetUp"
# --- 关键修改:注入模型路径 ---
if should_retrain:
print(f"[INFO]: 检测到预训练模型,正在从 {checkpoint_path} 恢复训练...")
# rl_games 会读取 config 中的 load_path 进行续训
rl_config['params']['config']['load_path'] = checkpoint_path
else:
print("[INFO]: 未找到预训练模型,将从零开始训练。")
# 7. 运行训练
runner = Runner()
runner.load(rl_config)
runner.run({
"train": True,
"play": False,
# 如果你想强制从某个 checkpoint 开始,也可以在这里传参
"checkpoint": checkpoint_path if should_retrain else None,
"vec_env": wrapped_env
})
simulation_app.close()
if __name__ == "__main__":
main()