Files
Gym_GPU/rl_game/demo/config/ppo_cfg.yaml
2026-03-15 20:14:06 -04:00

60 lines
1.2 KiB
YAML

params:
seed: 42
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 256, 128]
activation: elu
d2rl: False
initializer:
name: default
config:
name: T1_Walking
env_name: rlgym # Isaac Lab 包装器
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: 16384 # 同时训练的机器人数量
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 5000
save_best_after: 50
save_frequency: 100
grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True
bounds_loss_coef: 0.0
e_clip: 0.2
horizon_length: 128
minibatch_size: 32768
mini_epochs: 5
critic_coef: 2
clip_value: True