The demo of get up

This commit is contained in:
2026-03-16 05:00:20 -04:00
parent 1b4c0913b7
commit 4b0b1fac8d
7 changed files with 341 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
params:
seed: 42
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 256, 128]
activation: relu
d2rl: False
initializer:
name: default
config:
name: T1_Walking
env_name: rlgym # Isaac Lab 包装器
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: 16384 # 同时训练的机器人数量
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.96
tau: 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.013
score_to_win: 20000
max_epochs: 5000
save_best_after: 50
save_frequency: 100
grad_norm: 1.0
entropy_coef: 0.02
truncate_grads: True
bounds_loss_coef: 0.001
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 4
critic_coef: 2
clip_value: True

View File

@@ -0,0 +1,116 @@
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg
import isaaclab.envs.mdp as mdp
@configclass
class T1ObservationCfg:
"""观察值:必须包含姿态和高度,否则机器人不知道自己是怎么躺着的"""
@configclass
class PolicyCfg(ObsGroup):
concatenate_terms = True
enable_corruption = False
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
projected_gravity = ObsTerm(func=mdp.projected_gravity)
# ⬅️ 新增:让机器人知道自己离地多高
root_height = ObsTerm(func=mdp.root_height)
joint_pos = ObsTerm(func=mdp.joint_pos_rel)
joint_vel = ObsTerm(func=mdp.joint_vel_rel)
actions = ObsTerm(func=mdp.last_action)
policy = PolicyCfg()
@configclass
class T1EventCfg:
"""事件配置:实现多种随机躺下姿态的关键"""
# 每次重置时,随机给基座一个旋转角度
# 包括:背躺 (Pitch=1.57), 趴着 (Pitch=-1.57), 侧躺 (Roll=1.57)
reset_robot_rotation = EventTerm(
func=mdp.reset_root_custom_orientation,
params={
"euler_angles_range": {
"roll": (-1.57, 1.57), # 覆盖侧躺
"pitch": (-1.57, 1.57), # 覆盖趴下和背躺
"yaw": (-3.14, 3.14), # 随机朝向
},
"asset_cfg": SceneEntityCfg("robot"),
},
mode="reset",
)
@configclass
class T1GetUpRewardCfg:
"""起身任务专用奖励:快、直、稳"""
# 1. 核心进度:高度奖励
# 只要低于目标高度(0.6m)就会持续根据距离给负分越高分数越接近0
height_progress = RewTerm(
func=mdp.root_height_below_minimum,
weight=15.0,
params={"minimum_height": 0.65}
)
# 2. 速度惩罚:时间就是金钱
# 每一帧都扣 0.5 分,起身越慢,被扣掉的总分就越多
time_penalty = RewTerm(
func=mdp.is_alive,
weight=-0.5
)
# 3. 姿态奖励:上半身必须垂直向上
upright = RewTerm(func=mdp.flat_orientation_l2, weight=2.0)
# 4. 平滑惩罚:防止起身时由于急躁导致的剧烈抖动
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.001)
@configclass
class T1TerminationsCfg:
"""终止条件"""
# 提前成功:如果站起来了并保持稳定,直接结束 Episode 并拿奖励,节省算力
is_standing_success = DoneTerm(
func=mdp.root_height_below_minimum,
params={"minimum_height": 0.65, "inverted": True} # 高于 0.65m 则重置
)
time_out = DoneTerm(func=mdp.time_out)
@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
"""主环境配置:针对 T1 起身任务"""
scene = T1SceneCfg(num_envs=16384, env_spacing=2.5)
# 覆盖初始位置:必须低高度生成,防止随机旋转后高空坠落
def __post_init__(self):
super().__post_init__()
self.scene.robot.init_state.pos = (0.0, 0.0, 0.2)
observations = T1ObservationCfg()
rewards = T1GetUpRewardCfg()
terminations = T1TerminationsCfg()
events = T1EventCfg() # ⬅️ 挂载随机姿态事件
actions = JointPositionActionCfg(
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
)
# 训练参数
episode_length_s = 5.0 # 强制 5秒内必须站起来
decimation = 4 # 提高控制频率 (约 50Hz) 有助于精细动作