params: seed: 42 algo: name: a2c_continuous model: name: continuous_a2c_logstd network: name: actor_critic separate: False space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: 1.2 fixed_sigma: False mlp: units: [512, 256, 128] activation: relu d2rl: False initializer: name: default config: name: T1_Walking env_name: rlgym # Isaac Lab 包装器 multi_gpu: False ppo: True mixed_precision: True normalize_input: True normalize_value: True value_bootstrap: True num_actors: 8192 # 同时训练的机器人数量 reward_shaper: scale_value: 1.0 normalize_advantage: True gamma: 0.96 tau: 0.95 learning_rate: 3e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 20000 max_epochs: 200 save_best_after: 50 save_frequency: 100 grad_norm: 0.5 entropy_coef: 0.008 truncate_grads: True bounds_loss_coef: 0.001 e_clip: 0.2 horizon_length: 128 minibatch_size: 8192 mini_epochs: 4 critic_coef: 1 clip_value: True