60 lines
1.2 KiB
YAML
60 lines
1.2 KiB
YAML
params:
|
|
seed: 42
|
|
algo:
|
|
name: a2c_continuous
|
|
|
|
model:
|
|
name: continuous_a2c_logstd
|
|
|
|
network:
|
|
name: actor_critic
|
|
separate: False
|
|
space:
|
|
continuous:
|
|
mu_activation: None
|
|
sigma_activation: None
|
|
mu_init:
|
|
name: default
|
|
sigma_init:
|
|
name: const_initializer
|
|
val: 1.2
|
|
fixed_sigma: False
|
|
mlp:
|
|
units: [512, 256, 128]
|
|
activation: relu
|
|
d2rl: False
|
|
initializer:
|
|
name: default
|
|
|
|
config:
|
|
name: T1_Walking
|
|
env_name: rlgym # Isaac Lab 包装器
|
|
multi_gpu: False
|
|
ppo: True
|
|
mixed_precision: True
|
|
normalize_input: True
|
|
normalize_value: True
|
|
value_bootstrap: True
|
|
num_actors: 8192 # 同时训练的机器人数量
|
|
reward_shaper:
|
|
scale_value: 1.0
|
|
normalize_advantage: True
|
|
gamma: 0.96
|
|
tau: 0.95
|
|
learning_rate: 3e-4
|
|
lr_schedule: adaptive
|
|
kl_threshold: 0.008
|
|
score_to_win: 20000
|
|
max_epochs: 200
|
|
save_best_after: 50
|
|
save_frequency: 100
|
|
grad_norm: 0.5
|
|
entropy_coef: 0.008
|
|
truncate_grads: True
|
|
bounds_loss_coef: 0.001
|
|
e_clip: 0.2
|
|
horizon_length: 128
|
|
minibatch_size: 8192
|
|
mini_epochs: 4
|
|
critic_coef: 1
|
|
clip_value: True |