params: seed: 42 algo: name: a2c_continuous model: name: continuous_a2c_logstd network: name: actor_critic separate: False space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: 0 fixed_sigma: True mlp: units: [512, 256, 128] activation: elu d2rl: False initializer: name: default config: name: T1_Walking env_name: rlgym # Isaac Lab 包装器 multi_gpu: False ppo: True mixed_precision: True normalize_input: True normalize_value: True value_bootstrap: True num_actors: 16384 # 同时训练的机器人数量 reward_shaper: scale_value: 1.0 normalize_advantage: True gamma: 0.99 tau: 0.95 learning_rate: 3e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 20000 max_epochs: 5000 save_best_after: 50 save_frequency: 100 grad_norm: 1.0 entropy_coef: 0.01 truncate_grads: True bounds_loss_coef: 0.0 e_clip: 0.2 horizon_length: 128 minibatch_size: 32768 mini_epochs: 5 critic_coef: 2 clip_value: True