60 lines
1.2 KiB
YAML
60 lines
1.2 KiB
YAML
|
|
params:
|
||
|
|
seed: 42
|
||
|
|
algo:
|
||
|
|
name: a2c_continuous
|
||
|
|
|
||
|
|
model:
|
||
|
|
name: continuous_a2c_logstd
|
||
|
|
|
||
|
|
network:
|
||
|
|
name: actor_critic
|
||
|
|
separate: False
|
||
|
|
space:
|
||
|
|
continuous:
|
||
|
|
mu_activation: None
|
||
|
|
sigma_activation: None
|
||
|
|
mu_init:
|
||
|
|
name: default
|
||
|
|
sigma_init:
|
||
|
|
name: const_initializer
|
||
|
|
val: 0
|
||
|
|
fixed_sigma: True
|
||
|
|
mlp:
|
||
|
|
units: [512, 256, 128]
|
||
|
|
activation: relu
|
||
|
|
d2rl: False
|
||
|
|
initializer:
|
||
|
|
name: default
|
||
|
|
|
||
|
|
config:
|
||
|
|
name: T1_Walking
|
||
|
|
env_name: rlgym # Isaac Lab 包装器
|
||
|
|
multi_gpu: False
|
||
|
|
ppo: True
|
||
|
|
mixed_precision: True
|
||
|
|
normalize_input: True
|
||
|
|
normalize_value: True
|
||
|
|
value_bootstrap: True
|
||
|
|
num_actors: 16384 # 同时训练的机器人数量
|
||
|
|
reward_shaper:
|
||
|
|
scale_value: 1.0
|
||
|
|
normalize_advantage: True
|
||
|
|
gamma: 0.96
|
||
|
|
tau: 0.95
|
||
|
|
learning_rate: 5e-4
|
||
|
|
lr_schedule: adaptive
|
||
|
|
kl_threshold: 0.013
|
||
|
|
score_to_win: 20000
|
||
|
|
max_epochs: 5000
|
||
|
|
save_best_after: 50
|
||
|
|
save_frequency: 100
|
||
|
|
grad_norm: 1.0
|
||
|
|
entropy_coef: 0.02
|
||
|
|
truncate_grads: True
|
||
|
|
bounds_loss_coef: 0.001
|
||
|
|
e_clip: 0.2
|
||
|
|
horizon_length: 32
|
||
|
|
minibatch_size: 16384
|
||
|
|
mini_epochs: 4
|
||
|
|
critic_coef: 2
|
||
|
|
clip_value: True
|