Files
Gym_GPU/rl_game/get_up/config/ppo_cfg.yaml
2026-03-18 06:18:29 -04:00

60 lines
1.2 KiB
YAML

params:
seed: 42
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 256, 128]
activation: relu
d2rl: False
initializer:
name: default
config:
name: T1_Walking
env_name: rlgym # Isaac Lab 包装器
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: 32768 # 同时训练的机器人数量
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.96
tau: 0.95
learning_rate: 5e-4
lr_schedule: adaptive
kl_threshold: 0.013
score_to_win: 20000
max_epochs: 500000
save_best_after: 50
save_frequency: 100
grad_norm: 1.0
entropy_coef: 0.02
truncate_grads: True
bounds_loss_coef: 0.001
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 4
critic_coef: 2
clip_value: True